Commit af825cb9e3a875e096d138d147b325ab67c2beb0

Authored by Pavel Govyadinov
2 parents f7b84fb2 cf5b4c92

fixed merge conflicts

matlab/loadAgilent.m 0 → 100644
  1 +function S = loadAgilent(filename)
  2 +
  3 +fid = fopen(filename); %open the file for reading
  4 +fseek(fid, 9, 'bof'); %skip past the first 9 bytes of the header
  5 +
  6 +bands = fread(fid, 1, 'int16'); %read the number of bands in the file
  7 +fseek(fid, 13, 'cof'); %skip the next 13 bytes in the header
  8 +
  9 +samples = fread(fid, 1, 'int16'); %read the number of samples (X)
  10 +lines = fread(fid, 1, 'int16'); %read the number of lines (Y)
  11 +
  12 +fseek(fid, 1020, 'bof'); %skip past the entire header
  13 +S = fread(fid, [samples lines*bands], 'float32'); %read all the data
  14 +S = reshape(S, [samples, lines, bands]);
  15 +fclose(fid); %close the file
  16 +
  17 +
stim/biomodels/cellset.h 0 → 100644
  1 +#ifndef STIM_CELLSET_H
  2 +#define STIM_CELLSET_H
  3 +
  4 +#include <stim/math/vec3.h>
  5 +#include <vector>
  6 +#include <unordered_map>
  7 +#include <fstream>
  8 +
  9 +namespace stim{
  10 +
  11 +class cellset{
  12 +private:
  13 + static const char delim = ' ';
  14 +protected:
  15 + std::vector<double*> cells; //vector storing field data for each cell
  16 + std::unordered_map<std::string, size_t> fields; //unordered map storing field->index information for each field
  17 + size_t ip[3]; //hard code to position indices (for speed)
  18 +
  19 + void init(){
  20 +
  21 + }
  22 +public:
  23 + /// Constructor - create an empty cell set
  24 + cellset(){
  25 + init();
  26 + }
  27 +
  28 + /// Constructor - load a cellset from a file
  29 + cellset(std::string filename){
  30 + init(); //initialize an empty cellset
  31 + load(filename); //load the cellset from an existing file
  32 + }
  33 +
  34 + /// Loads a cellset from a file
  35 + void load(std::string filename){
  36 + std::ifstream infile(filename);
  37 + std::string header; //allocate space for the file header
  38 + std::getline(infile, header); //get the file header
  39 +
  40 + // break the header into fields
  41 + std::stringstream ss(header); //create a string stream
  42 + std::string field; //store a single field name
  43 + size_t i = 0; //current field index
  44 + while (std::getline(ss, field, delim)) { //split the header into individual fields
  45 + std::pair<std::string, size_t> p(field, i); //create a pair associating the header name with the index
  46 + fields.insert(p); //insert the pair into the fields map
  47 + i++; //increment the data index
  48 + }
  49 + size_t nfields = fields.size(); //store the number of fields for each cell
  50 +
  51 + //load each cell and all associated fields
  52 + std::string cell_line; //string holds all information for a cell
  53 + std::list<std::string> cell_list; //list will be temporary storage for the cell fields
  54 + while(std::getline(infile, cell_line)){ //for each cell entry
  55 + cell_list.push_back(cell_line); //push the cell entry into the list
  56 + }
  57 +
  58 + //convert the list into actual data
  59 + size_t ncells = cell_list.size(); //count the number of cells
  60 + cells.resize(ncells); //allocate enough space in the array to store all cells
  61 + for(size_t c = 0; c < ncells; c++){ //for each cell entry in the list
  62 + cells[c] = (double*) malloc(sizeof(double) * nfields); //allocate enough space for each field
  63 + std::stringstream fss(cell_list.front()); //turn the string representing the cell list into a stringstream
  64 + for(size_t f = 0; f < nfields; f++){ //for each field
  65 + fss>>cells[c][f]; //load the field
  66 + }
  67 + cell_list.pop_front(); //pop the read string off of the front of the list
  68 + }
  69 + infile.close(); //close the input file
  70 +
  71 + ip[0] = fields["x"]; //hard code the position indices for speed
  72 + ip[1] = fields["y"]; // this assumes all cells have positions
  73 + ip[2] = fields["z"];
  74 + }
  75 +
  76 + /// Return the value a specified field for a cell
  77 + /// @param c is the cell index
  78 + /// @param f is the field
  79 + double value(size_t c, std::string f){
  80 + size_t idx = fields[f];
  81 + return cells[c][idx];
  82 + }
  83 +
  84 + /// returns an ID used to look up a field
  85 + bool exists(std::string f){
  86 + std::unordered_map<std::string, size_t>::iterator iter = fields.find(f);
  87 + if(iter == fields.end()) return false;
  88 + else return true;
  89 + }
  90 +
  91 + /// Return the position of cell [i]
  92 + stim::vec3<double> p(size_t i){
  93 + stim::vec3<double> pos(cells[i][ip[0]], cells[i][ip[1]], cells[i][ip[2]]);
  94 + return pos;
  95 + }
  96 +
  97 + /// Return the number of cells in the set
  98 + size_t size(){
  99 + return cells.size();
  100 + }
  101 +
  102 + /// Return the maximum value of a field in this cell set
  103 + double max(std::string field){
  104 + size_t idx = fields[field]; //get the field index
  105 + size_t ncells = cells.size(); //get the total number of cells
  106 + double maxval, val; //stores the current and maximum values
  107 + for(size_t c = 0; c < ncells; c++){ //for each cell
  108 + val = cells[c][idx]; //get the field value for this cell
  109 + if(c == 0) maxval = val; //if this is the first cell, just assign the maximum
  110 + else if(val > maxval) maxval = val; // otherwise text for the size of val and assign it as appropriate
  111 + }
  112 + return maxval;
  113 + }
  114 +
  115 + /// Return the maximum value of a field in this cell set
  116 + double min(std::string field){
  117 + size_t idx = fields[field]; //get the field index
  118 + size_t ncells = cells.size(); //get the total number of cells
  119 + double minval, val; //stores the current and maximum values
  120 + for(size_t c = 0; c < ncells; c++){ //for each cell
  121 + val = cells[c][idx]; //get the field value for this cell
  122 + if(c == 0) minval = val; //if this is the first cell, just assign the maximum
  123 + else if(val < minval) minval = val; // otherwise text for the size of val and assign it as appropriate
  124 + }
  125 + return minval;
  126 + }
  127 +
  128 +
  129 +}; //end class cellset
  130 +}; //end namespace stim
  131 +
  132 +#endif
0 \ No newline at end of file 133 \ No newline at end of file
stim/cuda/ivote/local_max.cuh
@@ -14,7 +14,7 @@ namespace stim{ @@ -14,7 +14,7 @@ namespace stim{
14 14
15 // calculate the 2D coordinates for this current thread. 15 // calculate the 2D coordinates for this current thread.
16 int xi = blockIdx.x * blockDim.x + threadIdx.x; 16 int xi = blockIdx.x * blockDim.x + threadIdx.x;
17 - int yi = blockIdx.y; 17 + int yi = blockIdx.y * blockDim.y + threadIdx.y;
18 18
19 if(xi >= x || yi >= y) 19 if(xi >= x || yi >= y)
20 return; 20 return;
@@ -63,8 +63,10 @@ namespace stim{ @@ -63,8 +63,10 @@ namespace stim{
63 void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){ 63 void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){
64 64
65 unsigned int max_threads = stim::maxThreadsPerBlock(); 65 unsigned int max_threads = stim::maxThreadsPerBlock();
66 - dim3 threads(max_threads, 1);  
67 - dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y); 66 + /*dim3 threads(max_threads, 1);
  67 + dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);*/
  68 + dim3 threads( sqrt(max_threads), sqrt(max_threads) );
  69 + dim3 blocks(x/threads.x + 1, y/threads.y + 1);
68 70
69 //call the kernel to find the local maximum. 71 //call the kernel to find the local maximum.
70 cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y); 72 cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y);
stim/cuda/ivote/re_sample.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_RE_SAMPLE_H
  2 +#define STIM_CUDA_RE_SAMPLE_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/templates/gaussian_blur.cuh>
  8 +
  9 +namespace stim{
  10 + namespace cuda{
  11 +
  12 + template<typename T>
  13 + __global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
  14 +
  15 + unsigned int sigma_ds = 1/resize;
  16 + unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  17 + unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  18 +
  19 +
  20 + // calculate the 2D coordinates for this current thread.
  21 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  22 + int yi = blockIdx.y;
  23 + // convert 2D coordinates to 1D
  24 + int i = yi * x + xi;
  25 +
  26 + if(xi< x && yi< y){
  27 + if(xi%sigma_ds==0){
  28 + if(yi%sigma_ds==0){
  29 + gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];
  30 + }
  31 + }
  32 + else gpuI[i] = 0;
  33 +
  34 + //int x_org = xi * sigma_ds ;
  35 + //int y_org = yi * sigma_ds ;
  36 + //int i_org = y_org * x + x_org;
  37 + //gpuI[i] = gpuI0[i_org];
  38 + }
  39 +
  40 + }
  41 +
  42 +
  43 + /// Applies a Gaussian blur to a 2D image stored on the GPU
  44 + template<typename T>
  45 + void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
  46 +
  47 +
  48 + //unsigned int sigma_ds = 1/resize;
  49 + //unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  50 + //unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  51 +
  52 + //get the number of pixels in the image
  53 + //unsigned int pixels_ds = x_ds * y_ds;
  54 +
  55 + unsigned int max_threads = stim::maxThreadsPerBlock();
  56 + dim3 threads(max_threads, 1);
  57 + dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
  58 +
  59 + //stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
  60 +
  61 + //resample the image
  62 + cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
  63 +
  64 + }
  65 +
  66 + /// Applies a Gaussian blur to a 2D image stored on the CPU
  67 + template<typename T>
  68 + void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){
  69 +
  70 + //get the number of pixels in the image
  71 + unsigned int pixels = x*y;
  72 + unsigned int bytes = sizeof(T) * pixels;
  73 +
  74 + unsigned int sigma_ds = 1/resize;
  75 + unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  76 + unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  77 + unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
  78 +
  79 +
  80 +
  81 + //allocate space on the GPU for the original image
  82 + T* gpuI0;
  83 + cudaMalloc(&gpuI0, bytes_ds);
  84 +
  85 +
  86 + //copy the image data to the GPU
  87 + cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);
  88 +
  89 + //allocate space on the GPU for the down sampled image
  90 + T* gpuI;
  91 + cudaMalloc(&gpuI, bytes);
  92 +
  93 + //run the GPU-based version of the algorithm
  94 + gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);
  95 +
  96 + //copy the image data to the GPU
  97 + cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
  98 +
  99 + cudaFree(gpuI0);
  100 + cudeFree(gpuI);
  101 + }
  102 +
  103 + }
  104 +}
  105 +
  106 +#endif
0 \ No newline at end of file 107 \ No newline at end of file
stim/cuda/ivote/update_dir_global.cuh renamed to stim/cuda/ivote/update_dir_bb.cuh
1 -#ifndef STIM_CUDA_UPDATE_DIR_GLOBALD_H  
2 -#define STIM_CUDA_UPDATE_DIR_GLOBAL_H 1 +#ifndef STIM_CUDA_UPDATE_DIR_BB_H
  2 +#define STIM_CUDA_UPDATE_DIR_BB_H
3 3
4 # include <iostream> 4 # include <iostream>
5 # include <cuda.h> 5 # include <cuda.h>
@@ -7,8 +7,7 @@ @@ -7,8 +7,7 @@
7 #include <stim/cuda/sharedmem.cuh> 7 #include <stim/cuda/sharedmem.cuh>
8 #include <stim/visualization/aabb2.h> 8 #include <stim/visualization/aabb2.h>
9 #include <stim/visualization/colormap.h> 9 #include <stim/visualization/colormap.h>
10 -#include <math.h>  
11 -#include "cpyToshare.cuh" 10 +#include <math.h>
12 11
13 //#define RMAX_TEST 8 12 //#define RMAX_TEST 8
14 13
@@ -76,68 +75,6 @@ namespace stim{ @@ -76,68 +75,6 @@ namespace stim{
76 gpuDir[i] = atan2((T)max_dy, (T)max_dx); 75 gpuDir[i] = atan2((T)max_dy, (T)max_dx);
77 } 76 }
78 77
79 - // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.  
80 - template<typename T>  
81 - __global__ void leila_cuda_update_dir(T* gpuDir, T* gpuVote, T* gpuGrad, T* gpuTable, T phi, int rmax, int x, int y){  
82 -  
83 -  
84 - // calculate the 2D coordinates for this current thread.  
85 - int xi = blockIdx.x * blockDim.x + threadIdx.x;  
86 - int yi = blockIdx.y * blockDim.y + threadIdx.y;  
87 -  
88 - if(xi >= x || yi >= y) return; //if the index is outside of the image, terminate the kernel  
89 -  
90 - int i = yi * x + xi; // convert 2D coordinates to 1D  
91 -  
92 - float theta = gpuGrad[2*i]; // calculate the voting direction based on the grtadient direction - global memory fetch  
93 - gpuDir[i] = 0; //initialize the vote direction to zero  
94 - float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter  
95 - int id_x = 0; // define two local variables for the x and y position of the maximum  
96 - int id_y = 0;  
97 -  
98 - int x_table = 2*rmax +1; // compute the size of window which will be checked for finding the voting area for this voter  
99 - int rmax_sq = rmax * rmax;  
100 - int tx_rmax = threadIdx.x + rmax;  
101 - float atan_angle;  
102 - float vote_c;  
103 - int xidx, yidx, yr_sq, xr_sq;  
104 - for(int yr = -rmax; yr <= rmax; yr++){  
105 - yidx = yi + yr; //compute the index into the image  
106 - if (yidx >= 0 && yidx < y){ //if the current y-index is inside the image  
107 - yr_sq = yr * yr; //compute the square of yr, to save time later  
108 - for(int xr = -rmax; xr <= rmax; xr++){  
109 - xidx = xi + xr;  
110 - if(xidx >= 0 && xidx < x){  
111 - xr_sq = xr * xr;  
112 - unsigned int ind_t = (rmax - yr) * x_table + rmax - xr;  
113 -  
114 - // calculate the angle between the voter and the current pixel in x and y directions  
115 - atan_angle = gpuTable[ind_t];  
116 - //atan_angle = atan2((T)yr, (T)xr);  
117 -  
118 - // check if the current pixel is located in the voting area of this voter.  
119 - if (((xr_sq + yr_sq)< rmax_sq) && (abs(atan_angle - theta) <phi)){  
120 -  
121 - vote_c = gpuVote[yidx * x + xidx]; // find the vote value for the current counter  
122 - // compare the vote value of this pixel with the max value to find the maxima and its index.  
123 - if (vote_c>max) {  
124 -  
125 - max = vote_c;  
126 - id_x = xr;  
127 - id_y = yr;  
128 - }  
129 - }  
130 - }  
131 - }  
132 - }  
133 - }  
134 -  
135 - unsigned int ind_m = (rmax - id_y) * x_table + (rmax - id_x);  
136 - float new_angle = gpuTable[ind_m];  
137 -  
138 - if(xi < x && yi < y)  
139 - gpuDir[i] = new_angle;  
140 - } //end kernel  
141 78
142 79
143 // this kernel updates the gradient direction by the calculated voting direction. 80 // this kernel updates the gradient direction by the calculated voting direction.
@@ -168,9 +105,7 @@ namespace stim{ @@ -168,9 +105,7 @@ namespace stim{
168 HANDLE_ERROR( cudaMalloc(&gpuDir, bytes) ); 105 HANDLE_ERROR( cudaMalloc(&gpuDir, bytes) );
169 106
170 unsigned int max_threads = stim::maxThreadsPerBlock(); 107 unsigned int max_threads = stim::maxThreadsPerBlock();
171 - //dim3 threads(min(x, max_threads), 1);  
172 - //dim3 blocks(x/threads.x, y);  
173 - 108 +
174 dim3 threads( sqrt(max_threads), sqrt(max_threads) ); 109 dim3 threads( sqrt(max_threads), sqrt(max_threads) );
175 dim3 blocks(x/threads.x + 1, y/threads.y + 1); 110 dim3 blocks(x/threads.x + 1, y/threads.y + 1);
176 111
@@ -188,12 +123,12 @@ namespace stim{ @@ -188,12 +123,12 @@ namespace stim{
188 123
189 //call the kernel to calculate the new voting direction 124 //call the kernel to calculate the new voting direction
190 cuda_update_dir <<< blocks, threads, shared_mem_req>>>(gpuDir, gpuVote, gpuGrad, gpuTable, phi, rmax, x , y); 125 cuda_update_dir <<< blocks, threads, shared_mem_req>>>(gpuDir, gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
191 - stim::gpu2image<T>(gpuDir, "dir_david.bmp", x, y, -pi, pi, stim::cmBrewer); 126 + //stim::gpu2image<T>(gpuDir, "dir_david.bmp", x, y, -pi, pi, stim::cmBrewer);
192 127
193 //exit(0); 128 //exit(0);
194 129
195 - threads = dim3( sqrt(max_threads), sqrt(max_threads) );  
196 - blocks = dim3(x/threads.x + 1, y/threads.y + 1); 130 + //threads = dim3( sqrt(max_threads), sqrt(max_threads) );
  131 + //blocks = dim3(x/threads.x + 1, y/threads.y + 1);
197 132
198 //call the kernel to update the gradient direction 133 //call the kernel to update the gradient direction
199 cuda_update_grad <<< blocks, threads >>>(gpuGrad, gpuDir, x , y); 134 cuda_update_grad <<< blocks, threads >>>(gpuGrad, gpuDir, x , y);
stim/cuda/ivote/david_update_dir_global.cuh renamed to stim/cuda/ivote/update_dir_threshold_global.cuh
1 -#ifndef STIM_CUDA_UPDATE_DIR_GLOBALD_H  
2 -#define STIM_CUDA_UPDATE_DIR_GLOBAL_H 1 +#ifndef STIM_CUDA_UPDATE_DIR_THRESHOLD_GLOBALD_H
  2 +#define STIM_CUDA_UPDATE_DIR_THRESHOLD_GLOBAL_H
3 3
4 # include <iostream> 4 # include <iostream>
5 # include <cuda.h> 5 # include <cuda.h>
6 #include <stim/cuda/cudatools.h> 6 #include <stim/cuda/cudatools.h>
7 #include <stim/cuda/sharedmem.cuh> 7 #include <stim/cuda/sharedmem.cuh>
8 -#include <math.h>  
9 -#include "cpyToshare.cuh"  
10 -  
11 -#define RMAX_TEST 8 8 +#include "cpyToshare.cuh"
12 9
13 namespace stim{ 10 namespace stim{
14 namespace cuda{ 11 namespace cuda{
15 12
16 // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area. 13 // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
17 template<typename T> 14 template<typename T>
18 - __global__ void cuda_update_dir(T* gpuDir, T* gpuVote, T* gpuGrad, T* gpuTable, T phi, int rmax, int x, int y){  
19 - extern __shared__ T atan2_table[];  
20 -  
21 - //calculate the start point for this block  
22 - //int bxi = blockIdx.x * blockDim.x;  
23 -  
24 - stim::cuda::sharedMemcpy(atan2_table, gpuTable, (2 * rmax + 1) * (2 * rmax + 1), threadIdx.x, blockDim.x); 15 + __global__ void cuda_update_dir(T* gpuDir, T* gpuVote, T* gpuTh, T* gpuTable, T phi, int rmax, int th_size, int x, int y){
25 16
26 - __syncthreads();  
27 17
28 - // calculate the 2D coordinates for this current thread.  
29 - //int xi = bxi + threadIdx.x; 18 +
  19 + // calculate the coordinate for this current thread.
30 int xi = blockIdx.x * blockDim.x + threadIdx.x; 20 int xi = blockIdx.x * blockDim.x + threadIdx.x;
31 - int yi = blockIdx.y * blockDim.y + threadIdx.y;  
32 - if(xi >= x || yi >= y) return; //if the index is outside of the image, terminate the kernel  
33 -  
34 - int i = yi * x + xi; // convert 2D coordinates to 1D 21 + // calculate the voting direction based on the grtadient direction
  22 + float theta = gpuTh[3*xi];
35 23
36 - float theta = gpuGrad[2*i]; // calculate the voting direction based on the grtadient direction - global memory fetch  
37 - gpuDir[i] = 0; //initialize the vote direction to zero  
38 - float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter  
39 - int id_x = 0; // define two local variables for the x and y position of the maximum  
40 - int id_y = 0; 24 + //calculate the position and x, y coordinations of this voter in the original image
  25 + unsigned int i_v = gpuTh[3*xi+2];
  26 + unsigned int y_v = i_v/x;
  27 + unsigned int x_v = i_v - (y_v*x);
41 28
42 - int x_table = 2*rmax +1; // compute the size of window which will be checked for finding the voting area for this voter 29 + //initialize the vote direction to zero
  30 + gpuDir[xi] = 0;
  31 +
  32 + // define a local variable to maximum value of the vote image in the voting area for this voter
  33 + float max = 0;
  34 +
  35 + // define two local variables for the x and y coordinations where the maximum happened
  36 + int id_x = 0;
  37 + int id_y = 0;
  38 +
  39 + // compute the size of window which will be checked for finding the voting area for this voter
  40 + int x_table = 2*rmax +1;
43 int rmax_sq = rmax * rmax; 41 int rmax_sq = rmax * rmax;
44 int tx_rmax = threadIdx.x + rmax; 42 int tx_rmax = threadIdx.x + rmax;
45 - float atan_angle;  
46 - float vote_c;  
47 - unsigned int ind_t;  
48 - for(int yr = -rmax; yr <= rmax; yr++){ //for each counter in the y direction  
49 - if (yi+yr >= 0 && yi + yr < y){ //if the counter exists (we aren't looking outside of the image)  
50 - for(int xr = -rmax; xr <= rmax; xr++){ //for each counter in the x direction  
51 - if((xr * xr + yr *yr)< rmax_sq){ //if the counter is within range of the voter  
52 -  
53 - ind_t = (rmax - yr) * x_table + rmax - xr; //calculate the index to the atan2 table  
54 - atan_angle = atan2_table[ind_t]; //retrieve the direction vector from the table  
55 -  
56 - //atan_angle = atan2((float)yr, (float)xr);  
57 -  
58 - if (abs(atan_angle - theta) <phi){ // check if the current pixel is located in the voting angle of this voter.  
59 - vote_c = gpuVote[(yi+yr)*x + (xi+xr)]; // find the vote value for the current counter  
60 - if(vote_c>max) { // compare the vote value of this pixel with the max value to find the maxima and its index.  
61 - max = vote_c;  
62 - id_x = xr;  
63 - id_y = yr;  
64 - } 43 + if(xi < th_size){
  44 +
  45 + for(int yr = -rmax; yr <= rmax; yr++){
  46 +
  47 + for(int xr = -rmax; xr <= rmax; xr++){
  48 +
  49 + unsigned int ind_t = (rmax - yr) * x_table + rmax - xr;
  50 +
  51 + // find the angle between the voter and the current pixel in x and y directions
  52 + float atan_angle = gpuTable[ind_t];
  53 +
  54 + // check if the current pixel is located in the voting area of this voter.
  55 + if (((xr * xr + yr *yr)< rmax_sq) && (abs(atan_angle - theta) <phi)){
  56 + // find the vote value for the current counter
  57 + float vote_c = gpuVote[(y_v+yr)*x + (x_v+xr)];
  58 + // compare the vote value of this pixel with the max value to find the maxima and its index.
  59 + if (vote_c>max) {
  60 +
  61 + max = vote_c;
  62 + id_x = xr;
  63 + id_y = yr;
65 } 64 }
66 } 65 }
67 } 66 }
68 } 67 }
69 - } 68 +
70 69
71 - unsigned int ind_m = (rmax - id_y) * x_table + (rmax - id_x);  
72 - float new_angle = gpuTable[ind_m]; 70 + unsigned int ind_m = (rmax - id_y) * x_table + (rmax - id_x);
  71 + float new_angle = gpuTable[ind_m];
  72 + gpuDir[xi] = new_angle;
  73 + }
73 74
74 - if(xi < x && yi < y)  
75 - gpuDir[i] = new_angle;  
76 - } //end kernel 75 + }
77 76
78 // this kernel updates the gradient direction by the calculated voting direction. 77 // this kernel updates the gradient direction by the calculated voting direction.
79 template<typename T> 78 template<typename T>
80 - __global__ void cuda_update_grad(T* gpuGrad, T* gpuDir, int x, int y){ 79 + __global__ void cuda_update_grad(T* gpuTh, T* gpuDir, int th_size, int x, int y){
81 80
82 - // calculate the 2D coordinates for this current thread. 81 + // calculate the coordinate for this current thread.
83 int xi = blockIdx.x * blockDim.x + threadIdx.x; 82 int xi = blockIdx.x * blockDim.x + threadIdx.x;
84 - int yi = blockIdx.y * blockDim.y + threadIdx.y;  
85 -  
86 - // convert 2D coordinates to 1D  
87 - int i = yi * x + xi;  
88 83
  84 +
89 //update the gradient image with the vote direction 85 //update the gradient image with the vote direction
90 - gpuGrad[2*i] = gpuDir[i]; 86 + gpuTh[3*xi] = gpuDir[xi];
91 } 87 }
92 88
93 template<typename T> 89 template<typename T>
94 - void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){  
95 -  
96 - 90 + void gpu_update_dir(T* gpuVote, T* gpuTh, T* gpuTable, T phi, unsigned int rmax, unsigned int th_size, unsigned int x, unsigned int y){
97 91
98 //calculate the number of bytes in the array 92 //calculate the number of bytes in the array
99 - unsigned int bytes = x * y * sizeof(T); 93 + unsigned int bytes_th = th_size* sizeof(T);
100 94
101 unsigned int max_threads = stim::maxThreadsPerBlock(); 95 unsigned int max_threads = stim::maxThreadsPerBlock();
102 -  
103 - dim3 threads(sqrt(max_threads), sqrt(max_threads));  
104 - dim3 blocks(x/threads.x + 1, y/threads.y + 1);  
105 -  
106 - 96 + dim3 threads(max_threads);
  97 + dim3 blocks(th_size/threads.x+1);
107 98
108 // allocate space on the GPU for the updated vote direction 99 // allocate space on the GPU for the updated vote direction
109 T* gpuDir; 100 T* gpuDir;
110 - cudaMalloc(&gpuDir, bytes);  
111 -  
112 - size_t shared_mem = sizeof(T) * std::pow((2 * rmax + 1), 2);  
113 - std::cout<<"Shared memory for atan2 table: "<<shared_mem<<std::endl; 101 + cudaMalloc(&gpuDir, bytes_th);
114 102
115 //call the kernel to calculate the new voting direction 103 //call the kernel to calculate the new voting direction
116 - cuda_update_dir <<< blocks, threads, shared_mem>>>(gpuDir, gpuVote, gpuGrad, gpuTable, phi, rmax, x , y); 104 + cuda_update_dir <<< blocks, threads>>>(gpuDir, gpuVote, gpuTh, gpuTable, phi, rmax, th_size, x , y);
117 105
118 //call the kernel to update the gradient direction 106 //call the kernel to update the gradient direction
119 - cuda_update_grad <<< blocks, threads >>>(gpuGrad, gpuDir, x , y); 107 + cuda_update_grad <<< blocks, threads >>>(gpuTh, gpuDir, th_size, x , y);
120 108
121 //free allocated memory 109 //free allocated memory
122 cudaFree(gpuDir); 110 cudaFree(gpuDir);
stim/cuda/ivote/vote_atomic_bb.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE_ATOMIC_BB_H
  2 +#define STIM_CUDA_VOTE_ATOMIC_BB_H
  3 +
  4 +# include <iostream>
  5 +# include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/sharedmem.cuh>
  8 +#include <stim/visualization/aabb2.h>
  9 +#include <stim/visualization/colormap.h>
  10 +#include <math.h>
  11 +
  12 +namespace stim{
  13 + namespace cuda{
  14 +
  15 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  16 + template<typename T>
  17 + __global__ void cuda_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, int rmax, int x, int y){
  18 +
  19 + extern __shared__ T S[];
  20 + T* shared_atan = S;
  21 + size_t n_table = (rmax * 2 + 1) * (rmax * 2 + 1);
  22 + stim::cuda::threadedMemcpy((char*)shared_atan, (char*)gpuTable, sizeof(T) * n_table, threadIdx.x, blockDim.x);
  23 +
  24 + // calculate the 2D coordinates for this current thread.
  25 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  26 + int yi = blockIdx.y * blockDim.y + threadIdx.y;
  27 +
  28 + if(xi >= x || yi >= y) return;
  29 + // convert 2D coordinates to 1D
  30 + int i = yi * x + xi;
  31 +
  32 + // calculate the voting direction based on the grtadient direction
  33 + float theta = gpuGrad[2*i];
  34 + //calculate the amount of vote for the voter
  35 + float mag = gpuGrad[2*i + 1];
  36 +
  37 +
  38 + stim::aabb2<int> bb(xi, yi); //initialize a bounding box at the current point
  39 + bb.insert(xi + ceil(rmax * cos(theta)), ceil(yi + rmax * sin(theta)));
  40 + bb.insert(xi + ceil(rmax * cos(theta - phi)), yi + ceil(rmax * sin(theta - phi))); //insert one corner of the triangle into the bounding box
  41 + bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box
  42 +
  43 + // compute the size of window which will be checked for finding the proper voters for this pixel
  44 + int x_table = 2*rmax +1;
  45 + int rmax_sq = rmax * rmax;
  46 +
  47 + int lut_i;
  48 + T dx_sq, dy_sq;
  49 +
  50 + bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image
  51 + bb.trim_high(x-1, y-1);
  52 +
  53 + int by, bx;
  54 + int dx, dy;
  55 +
  56 + unsigned int ind_g; //initialize the maximum vote value to zero
  57 + T alpha;
  58 +
  59 + for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box
  60 + dy = by - yi; //calculate the y coordinate of the current point relative to yi
  61 + dy_sq = dy * dy;
  62 + for(bx = bb.low[0]; bx <= bb.high[0]; bx++){
  63 + dx = bx - xi;
  64 + dx_sq = dx * dx;
  65 + lut_i = (rmax - dy) * x_table + rmax - dx;
  66 + alpha = shared_atan[lut_i];
  67 + if(dx_sq + dy_sq < rmax_sq && abs(alpha - theta) < phi){
  68 + ind_g = (by)*x + (bx);
  69 + atomicAdd(&gpuVote[ind_g], mag);
  70 +
  71 + }
  72 + }
  73 + }
  74 +
  75 + }
  76 +
  77 +
  78 + template<typename T>
  79 + void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  80 +
  81 +
  82 + unsigned int max_threads = stim::maxThreadsPerBlock();
  83 + dim3 threads( sqrt(max_threads), sqrt(max_threads) );
  84 + dim3 blocks(x/threads.x + 1, y/threads.y + 1);
  85 + size_t table_bytes = sizeof(T) * (rmax * 2 + 1) * (rmax * 2 + 1);
  86 + size_t shared_mem_req = table_bytes;// + template_bytes;
  87 + std::cout<<"Shared Memory required: "<<shared_mem_req<<std::endl;
  88 + size_t shared_mem = stim::sharedMemPerBlock();
  89 + if(shared_mem_req > shared_mem){
  90 + std::cout<<"Error: insufficient shared memory for this implementation of cuda_update_dir()."<<std::endl;
  91 + exit(1);
  92 + }
  93 +
  94 + //call the kernel to do the voting
  95 + cuda_vote <<< blocks, threads, shared_mem_req>>>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  96 +
  97 + }
  98 +
  99 +
  100 + template<typename T>
  101 + void cpu_vote(T* cpuVote, T* cpuGrad,T* cpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  102 +
  103 + //calculate the number of bytes in the array
  104 + unsigned int bytes = x * y * sizeof(T);
  105 +
  106 + //calculate the number of bytes in the atan2 table
  107 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(T);
  108 +
  109 + //allocate space on the GPU for the Vote Image
  110 + T* gpuVote;
  111 + cudaMalloc(&gpuVote, bytes);
  112 +
  113 + //allocate space on the GPU for the input Gradient image
  114 + T* gpuGrad;
  115 + HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes*2));
  116 +
  117 + //copy the Gradient Magnitude data to the GPU
  118 + HANDLE_ERROR(cudaMemcpy(gpuGrad, cpuGrad, bytes*2, cudaMemcpyHostToDevice));
  119 +
  120 + //allocate space on the GPU for the atan2 table
  121 + T* gpuTable;
  122 + HANDLE_ERROR(cudaMalloc(&gpuTable, bytes_table));
  123 +
  124 + //copy the atan2 values to the GPU
  125 + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice));
  126 +
  127 + //call the GPU version of the vote calculation function
  128 + gpu_vote<T>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  129 +
  130 + //copy the Vote Data back to the CPU
  131 + cudaMemcpy(cpuVote, gpuVote, bytes, cudaMemcpyDeviceToHost) ;
  132 +
  133 + //free allocated memory
  134 + cudaFree(gpuTable);
  135 + cudaFree(gpuVote);
  136 + cudaFree(gpuGrad);
  137 + }
  138 +
  139 + }
  140 +}
  141 +
  142 +#endif
0 \ No newline at end of file 143 \ No newline at end of file
stim/cuda/ivote/vote_atomic_shared.cuh
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 # include <cuda.h> 5 # include <cuda.h>
6 #include <stim/cuda/cudatools.h> 6 #include <stim/cuda/cudatools.h>
7 #include <stim/cuda/sharedmem.cuh> 7 #include <stim/cuda/sharedmem.cuh>
8 -#include "cpyToshare.cuh" 8 +
9 //#include "writebackshared.cuh" 9 //#include "writebackshared.cuh"
10 namespace stim{ 10 namespace stim{
11 namespace cuda{ 11 namespace cuda{
stim/cuda/ivote/vote_shared.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE_SHARED_H
  2 +#define STIM_CUDA_VOTE_SHARED
  3 +# include <iostream>
  4 +# include <cuda.h>
  5 +#include <stim/cuda/cudatools.h>
  6 +#include <stim/cuda/sharedmem.cuh>
  7 +#include "cpyToshare.cuh"
  8 +
  9 +namespace stim{
  10 + namespace cuda{
  11 +
  12 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  13 + template<typename T>
  14 + __global__ void cuda_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, int rmax, int x, int y){
  15 +
  16 + //generate a pointer to shared memory (size will be specified as a kernel parameter)
  17 + extern __shared__ float s_grad[];
  18 +
  19 + //calculate the start point for this block
  20 + int bxi = blockIdx.x * blockDim.x;
  21 +
  22 + // calculate the 2D coordinates for this current thread.
  23 + int xi = bxi + threadIdx.x;
  24 + int yi = blockIdx.y * blockDim.y + threadIdx.y;
  25 + // convert 2D coordinates to 1D
  26 + int i = yi * x + xi;
  27 +
  28 + // define a local variable to sum the votes from the voters
  29 + float sum = 0;
  30 +
  31 + //calculate the width of the shared memory block
  32 + int swidth = 2 * rmax + blockDim.x;
  33 +
  34 + // compute the size of window which will be checked for finding the proper voters for this pixel
  35 + int x_table = 2*rmax +1;
  36 + int rmax_sq = rmax * rmax;
  37 + int tx_rmax = threadIdx.x + rmax;
  38 + int bxs = bxi - rmax;
  39 +
  40 + //for every line (along y)
  41 + for(int yr = -rmax; yr <= rmax; yr++){
  42 + if (yi+yr<y && yi+yr>=0){
  43 + //copy the portion of the image necessary for this block to shared memory
  44 + __syncthreads();
  45 + cpyG2S1D2ch<float>(s_grad, gpuGrad, bxs, yi + yr , 2*swidth, 1, threadIdx, blockDim, x, y);
  46 + __syncthreads();
  47 +
  48 + if(xi < x && yi < y){
  49 +
  50 + for(int xr = -rmax; xr <= rmax; xr++){
  51 +
  52 + //find the location of this voter in the atan2 table
  53 + int id_t = (yr + rmax) * x_table + xr + rmax;
  54 +
  55 + // calculate the angle between the pixel and the current voter in x and y directions
  56 + float atan_angle = gpuTable[id_t];
  57 +
  58 + // calculate the voting direction based on the grtadient direction
  59 + int idx_share = xr + tx_rmax ;
  60 + float theta = s_grad[idx_share*2];
  61 + float mag = s_grad[idx_share*2 + 1];
  62 +
  63 +
  64 + // check if the current voter is located in the voting area of this pixel.
  65 + if (((xr * xr + yr *yr)< rmax_sq) && (abs(atan_angle - theta) <phi)){
  66 + sum += mag;
  67 +
  68 + }
  69 + }
  70 +
  71 + }
  72 + }
  73 + }
  74 + if(xi < x && yi < y)
  75 + gpuVote[i] = sum;
  76 +
  77 + }
  78 +
  79 + template<typename T>
  80 + void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  81 +
  82 +
  83 + unsigned int max_threads = stim::maxThreadsPerBlock();
  84 + dim3 threads(max_threads, 1);
  85 + dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
  86 +
  87 +
  88 + // specify share memory
  89 + unsigned int share_bytes = (2*rmax + threads.x)*1*2*sizeof(T);
  90 +
  91 + //call the kernel to do the voting
  92 + cuda_vote <<< blocks, threads,share_bytes >>>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  93 +
  94 + }
  95 +
  96 +
  97 + template<typename T>
  98 + void cpu_vote(T* cpuVote, T* cpuGrad,T* cpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  99 +
  100 + //calculate the number of bytes in the array
  101 + unsigned int bytes = x * y * sizeof(T);
  102 +
  103 + //calculate the number of bytes in the atan2 table
  104 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(T);
  105 +
  106 + //allocate space on the GPU for the Vote Image
  107 + T* gpuVote;
  108 + cudaMalloc(&gpuVote, bytes);
  109 +
  110 + //allocate space on the GPU for the input Gradient image
  111 + T* gpuGrad;
  112 + HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes*2));
  113 +
  114 + //copy the Gradient Magnitude data to the GPU
  115 + HANDLE_ERROR(cudaMemcpy(gpuGrad, cpuGrad, bytes*2, cudaMemcpyHostToDevice));
  116 +
  117 + //allocate space on the GPU for the atan2 table
  118 + T* gpuTable;
  119 + HANDLE_ERROR(cudaMalloc(&gpuTable, bytes_table));
  120 +
  121 + //copy the atan2 values to the GPU
  122 + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice));
  123 +
  124 + //call the GPU version of the vote calculation function
  125 + gpu_vote<T>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  126 +
  127 + //copy the Vote Data back to the CPU
  128 + cudaMemcpy(cpuVote, gpuVote, bytes, cudaMemcpyDeviceToHost) ;
  129 +
  130 + //free allocated memory
  131 + cudaFree(gpuTable);
  132 + cudaFree(gpuVote);
  133 + cudaFree(gpuGrad);
  134 + }
  135 +
  136 + }
  137 +}
  138 +
  139 +#endif
0 \ No newline at end of file 140 \ No newline at end of file
stim/cuda/ivote/vote_threshold_global.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE_THRESHOLD_GLOBAL_H
  2 +#define STIM_CUDA_VOTE_THRESHOLD_GLOBAL_H
  3 +# include <iostream>
  4 +# include <cuda.h>
  5 +#include <stim/cuda/cudatools.h>
  6 +#include <stim/cuda/sharedmem.cuh>
  7 +#include "cpyToshare.cuh"
  8 +
  9 +namespace stim{
  10 + namespace cuda{
  11 +
  12 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  13 + template<typename T>
  14 + __global__ void cuda_vote(T* gpuVote, T* gpuTh, T* gpuTable, T phi, int rmax, int th_size, int x, int y){
  15 +
  16 +
  17 + // calculate the x coordinate for this current thread.
  18 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  19 +
  20 + // calculate the voting direction based on the grtadient direction
  21 + float theta = gpuTh[3*xi];
  22 + //find the gradient magnitude for the current voter
  23 + float mag = gpuTh[3*xi + 1];
  24 + //calculate the position and x, y coordinations of this voter in the original image
  25 + unsigned int i_v = gpuTh[3*xi+2];
  26 + unsigned int y_v = i_v/x;
  27 + unsigned int x_v = i_v - (y_v*x);
  28 +
  29 + // compute the size of window which will be checked for finding the proper voters for this pixel
  30 + int x_table = 2*rmax +1;
  31 + int rmax_sq = rmax * rmax;
  32 + if(xi < th_size){
  33 + for(int yr = -rmax; yr <= rmax; yr++){
  34 + for(int xr = -rmax; xr <= rmax; xr++){
  35 + if ((y_v+yr)>=0 && (y_v+yr)<y && (x_v+xr)>=0 && (x_v+xr)<x){
  36 +
  37 + //find the location of the current pixel in the atan2 table
  38 + unsigned int ind_t = (rmax - yr) * x_table + rmax - xr;
  39 +
  40 + // calculate the angle between the voter and the current pixel in x and y directions
  41 + float atan_angle = gpuTable[ind_t];
  42 +
  43 + // check if the current pixel is located in the voting area of this voter.
  44 + if (((xr * xr + yr *yr)< rmax_sq) && (abs(atan_angle - theta) <phi)){
  45 + // calculate the 1D index for the current pixel in global memory
  46 + unsigned int ind_g = (y_v+yr)*x + (x_v+xr);
  47 + atomicAdd(&gpuVote[ind_g], mag);
  48 +
  49 + }
  50 + }
  51 + }
  52 + }
  53 + }
  54 + }
  55 +
  56 + template<typename T>
  57 + void gpu_vote(T* gpuVote, T* gpuTh, T* gpuTable, T phi, unsigned int rmax, unsigned int th_size, unsigned int x, unsigned int y){
  58 +
  59 +
  60 + unsigned int max_threads = stim::maxThreadsPerBlock();
  61 + dim3 threads(max_threads);
  62 + dim3 blocks(th_size/threads.x + 1);
  63 +
  64 + //call the kernel to do the voting
  65 + cuda_vote <<< blocks, threads>>>(gpuVote, gpuTh, gpuTable, phi, rmax, th_size, x , y);
  66 +
  67 + }
  68 +
  69 +
  70 + template<typename T>
  71 + void cpu_vote(T* cpuVote, T* cpuGrad,T* cpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  72 +
  73 + //calculate the number of bytes in the array
  74 + unsigned int bytes = x * y * sizeof(T);
  75 +
  76 + //calculate the number of bytes in the atan2 table
  77 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(T);
  78 +
  79 + //allocate space on the GPU for the Vote Image
  80 + T* gpuVote;
  81 + cudaMalloc(&gpuVote, bytes);
  82 +
  83 + //allocate space on the GPU for the input Gradient image
  84 + T* gpuGrad;
  85 + HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes*2));
  86 +
  87 + //copy the Gradient Magnitude data to the GPU
  88 + HANDLE_ERROR(cudaMemcpy(gpuGrad, cpuGrad, bytes*2, cudaMemcpyHostToDevice));
  89 +
  90 + //allocate space on the GPU for the atan2 table
  91 + T* gpuTable;
  92 + HANDLE_ERROR(cudaMalloc(&gpuTable, bytes_table));
  93 +
  94 + //copy the atan2 values to the GPU
  95 + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice));
  96 +
  97 + //call the GPU version of the vote calculation function
  98 + gpu_vote<T>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  99 +
  100 + //copy the Vote Data back to the CPU
  101 + cudaMemcpy(cpuVote, gpuVote, bytes, cudaMemcpyDeviceToHost) ;
  102 +
  103 + //free allocated memory
  104 + cudaFree(gpuTable);
  105 + cudaFree(gpuVote);
  106 + cudaFree(gpuGrad);
  107 + }
  108 +
  109 + }
  110 +}
  111 +
  112 +#endif
0 \ No newline at end of file 113 \ No newline at end of file
stim/cuda/ivote_atomic.cuh renamed to stim/cuda/ivote_atomic_bb.cuh
1 -#ifndef STIM_CUDA_IVOTE_ATOMIC_H  
2 -#define STIM_CUDA_IVOTE_ATOMIC_H 1 +#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H
  2 +#define STIM_CUDA_IVOTE_ATOMIC_BB_H
3 3
4 #include <stim/cuda/ivote/down_sample.cuh> 4 #include <stim/cuda/ivote/down_sample.cuh>
5 #include <stim/cuda/ivote/local_max.cuh> 5 #include <stim/cuda/ivote/local_max.cuh>
6 -#include <stim/cuda/ivote/update_dir_global.cuh>  
7 -//#include <stim/cuda/ivote/vote_shared_32-32.cuh>  
8 -#include <stim/cuda/ivote/vote_atomic_shared.cuh>  
9 -//#include <stim/cuda/ivote/re_sample.cuh> 6 +#include <stim/cuda/ivote/update_dir_bb.cuh>
  7 +#include <stim/cuda/ivote/vote_atomic_bb.cuh>
  8 +
10 namespace stim{ 9 namespace stim{
11 namespace cuda{ 10 namespace cuda{
12 11
stim/envi/agilent_binary.h
@@ -35,26 +35,28 @@ public: @@ -35,26 +35,28 @@ public:
35 void alloc(){ 35 void alloc(){
36 ptr = (T*) malloc(bytes()); 36 ptr = (T*) malloc(bytes());
37 } 37 }
38 - void alloc(short x, short y, short z){ 38 + void alloc(size_t x, size_t y, size_t z){
39 R[0] = x; 39 R[0] = x;
40 R[1] = y; 40 R[1] = y;
41 R[2] = z; 41 R[2] = z;
42 alloc(); 42 alloc();
43 } 43 }
44 44
  45 + /// Create a deep copy of an agileng_binary object
45 void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){ 46 void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){
46 dst->alloc(src->R[0], src->R[1], src->R[2]); //allocate memory 47 dst->alloc(src->R[0], src->R[1], src->R[2]); //allocate memory
47 memcpy(dst->ptr, src->ptr, bytes()); //copy the data 48 memcpy(dst->ptr, src->ptr, bytes()); //copy the data
48 memcpy(dst->Z, src->Z, sizeof(double) * 2); //copy the data z range 49 memcpy(dst->Z, src->Z, sizeof(double) * 2); //copy the data z range
49 } 50 }
50 51
  52 + /// Default constructor, sets the resolution to zero and the data pointer to NULL
51 agilent_binary(){ 53 agilent_binary(){
52 - memset(R, 0, sizeof(short) * 3); //set the resolution to zero 54 + memset(R, 0, sizeof(size_t) * 3); //set the resolution to zero
53 ptr = NULL; 55 ptr = NULL;
54 } 56 }
55 57
56 /// Constructor with resolution 58 /// Constructor with resolution
57 - agilent_binary(short x, short y, short z){ 59 + agilent_binary(size_t x, size_t y, size_t z){
58 alloc(x, y, z); 60 alloc(x, y, z);
59 } 61 }
60 62
@@ -109,13 +111,11 @@ public: @@ -109,13 +111,11 @@ public:
109 111
110 char zero = 0; 112 char zero = 0;
111 for(size_t i = 0; i < 9; i++) outfile.write(&zero, 1); //write 9 zeros 113 for(size_t i = 0; i < 9; i++) outfile.write(&zero, 1); //write 9 zeros
112 - outfile.write((char*)&R[0], 2); 114 + outfile.write((char*)&R[2], 2);
113 for(size_t i = 0; i < 13; i++) outfile.write(&zero, 1); //write 13 zeros 115 for(size_t i = 0; i < 13; i++) outfile.write(&zero, 1); //write 13 zeros
  116 + outfile.write((char*)&R[0], 2);
114 outfile.write((char*)&R[1], 2); 117 outfile.write((char*)&R[1], 2);
115 - outfile.write((char*)&R[2], 2);  
116 for(size_t i = 0; i < 992; i++) outfile.write(&zero, 1); //write 992 zeros 118 for(size_t i = 0; i < 992; i++) outfile.write(&zero, 1); //write 992 zeros
117 - //char zerovec[1020];  
118 - //outfile.write((char*)zerovec, 1020);  
119 119
120 size_t b = bytes(); 120 size_t b = bytes();
121 outfile.write((char*)ptr, b); //write the data to the output file 121 outfile.write((char*)ptr, b); //write the data to the output file
@@ -149,7 +149,7 @@ public: @@ -149,7 +149,7 @@ public:
149 149
150 #ifdef CUDA_FOUND 150 #ifdef CUDA_FOUND
151 /// Perform an FFT and return a binary file with bands in the specified range 151 /// Perform an FFT and return a binary file with bands in the specified range
152 - agilent_binary<T> fft(float band_min, float band_max){ 152 + agilent_binary<T> fft(double band_min, double band_max, double ELWN = 15798, int UDR = 2){
153 auto total_start = std::chrono::high_resolution_clock::now(); 153 auto total_start = std::chrono::high_resolution_clock::now();
154 154
155 auto start = std::chrono::high_resolution_clock::now(); 155 auto start = std::chrono::high_resolution_clock::now();
@@ -177,8 +177,8 @@ public: @@ -177,8 +177,8 @@ public:
177 177
178 start = std::chrono::high_resolution_clock::now(); 178 start = std::chrono::high_resolution_clock::now();
179 int N[1]; //create an array with the interferogram size (required for cuFFT input) 179 int N[1]; //create an array with the interferogram size (required for cuFFT input)
180 - N[0] = R[2]; //set the only array value to the interferogram size  
181 - if(cufftPlanMany(&plan, 1, N, NULL, 1, R[2], NULL, 1, R[2], CUFFT_R2C, batch) != CUFFT_SUCCESS){ 180 + N[0] = (int)R[2]; //set the only array value to the interferogram size
  181 + if(cufftPlanMany(&plan, 1, N, NULL, 1, (int)R[2], NULL, 1, (int)R[2], CUFFT_R2C, (int)batch) != CUFFT_SUCCESS){
182 std::cout<<"cuFFT Error: unable to create 1D plan."<<std::endl; 182 std::cout<<"cuFFT Error: unable to create 1D plan."<<std::endl;
183 exit(1); 183 exit(1);
184 } 184 }
@@ -199,12 +199,13 @@ public: @@ -199,12 +199,13 @@ public:
199 std::complex<T>* cpu_fft = (std::complex<T>*) malloc( R[0] * R[1] * (R[2]/2+1) * sizeof(std::complex<T>) ); 199 std::complex<T>* cpu_fft = (std::complex<T>*) malloc( R[0] * R[1] * (R[2]/2+1) * sizeof(std::complex<T>) );
200 HANDLE_ERROR(cudaMemcpy(cpu_fft, gpu_fft, R[0] * R[1] * (R[2]/2+1) * sizeof(cufftComplex), cudaMemcpyDeviceToHost)); //copy data from the host to the device 200 HANDLE_ERROR(cudaMemcpy(cpu_fft, gpu_fft, R[0] * R[1] * (R[2]/2+1) * sizeof(cufftComplex), cudaMemcpyDeviceToHost)); //copy data from the host to the device
201 201
202 - double int_delta = 0.00012656; //interferogram sample spacing in centimeters 202 + //double int_delta = 0.00012656; //interferogram sample spacing in centimeters
  203 + double int_delta = (1.0 / ELWN) * ((double)UDR / 2.0); //calculate the interferogram spacing
203 double int_length = int_delta * R[2]; //interferogram length in centimeters 204 double int_length = int_delta * R[2]; //interferogram length in centimeters
204 double fft_delta = 1/int_length; //spectrum spacing (in inverse centimeters, wavenumber 205 double fft_delta = 1/int_length; //spectrum spacing (in inverse centimeters, wavenumber
205 206
206 - size_t start_i = std::ceil(band_min / fft_delta); //calculate the first band to store  
207 - size_t size_i = std::floor(band_max / fft_delta) - start_i; //calculate the number of bands to store 207 + size_t start_i = (size_t)std::ceil(band_min / fft_delta); //calculate the first band to store
  208 + size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i; //calculate the number of bands to store
208 size_t end_i = start_i + size_i; //last band number 209 size_t end_i = start_i + size_i; //last band number
209 agilent_binary<T> result(R[0], R[1], size_i); 210 agilent_binary<T> result(R[0], R[1], size_i);
210 result.Z[0] = start_i * fft_delta; //set the range for the FFT result 211 result.Z[0] = start_i * fft_delta; //set the range for the FFT result
@@ -1309,6 +1309,66 @@ public: @@ -1309,6 +1309,66 @@ public:
1309 1309
1310 } 1310 }
1311 1311
  1312 + bool multiply(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1313 + unsigned long long B = Z(); //calculate the number of bands
  1314 + unsigned long long ZX = Z() * X();
  1315 + unsigned long long XY = X() * Y(); //calculate the number of pixels in a band
  1316 + unsigned long long S = XY * sizeof(T); //calculate the number of bytes in a band
  1317 + unsigned long long L = ZX * sizeof(T);
  1318 +
  1319 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1320 +
  1321 + T * c; //pointer to the current ZX slice
  1322 + c = (T*)malloc( L ); //allocate space for the slice
  1323 +
  1324 + for(unsigned long long j = 0; j < Y(); j++){ //for each line
  1325 + read_plane_y(c, j); //load the line into memory
  1326 + for(unsigned long long i = 0; i < B; i++){ //for each band
  1327 + for(unsigned long long m = 0; m < X(); m++){ //for each sample
  1328 + if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked
  1329 + c[m + i * X()] *= (T)v;
  1330 + }
  1331 + }
  1332 + target.write(reinterpret_cast<const char*>(c), L); //write normalized data into destination
  1333 +
  1334 + if(PROGRESS) progress = (double)(j+1) / Y() * 100; //update the progress
  1335 + }
  1336 +
  1337 + free(c); //free the slice memory
  1338 + target.close(); //close the output file
  1339 + return true;
  1340 + }
  1341 +
  1342 + bool add(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1343 + unsigned long long B = Z(); //calculate the number of bands
  1344 + unsigned long long ZX = Z() * X();
  1345 + unsigned long long XY = X() * Y(); //calculate the number of pixels in a band
  1346 + unsigned long long S = XY * sizeof(T); //calculate the number of bytes in a band
  1347 + unsigned long long L = ZX * sizeof(T);
  1348 +
  1349 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1350 +
  1351 + T * c; //pointer to the current ZX slice
  1352 + c = (T*)malloc( L ); //allocate space for the slice
  1353 +
  1354 + for(unsigned long long j = 0; j < Y(); j++){ //for each line
  1355 + read_plane_y(c, j); //load the line into memory
  1356 + for(unsigned long long i = 0; i < B; i++){ //for each band
  1357 + for(unsigned long long m = 0; m < X(); m++){ //for each sample
  1358 + if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked
  1359 + c[m + i * X()] += (T)v;
  1360 + }
  1361 + }
  1362 + target.write(reinterpret_cast<const char*>(c), L); //write normalized data into destination
  1363 +
  1364 + if(PROGRESS) progress = (double)(j+1) / Y() * 100; //update the progress
  1365 + }
  1366 +
  1367 + free(c); //free the slice memory
  1368 + target.close(); //close the output file
  1369 + return true;
  1370 + }
  1371 +
1312 /// Close the file. 1372 /// Close the file.
1313 bool close(){ 1373 bool close(){
1314 file.close(); 1374 file.close();
stim/envi/binary.h
@@ -7,6 +7,13 @@ @@ -7,6 +7,13 @@
7 #include "../math/vector.h" 7 #include "../math/vector.h"
8 #include <fstream> 8 #include <fstream>
9 #include <sys/stat.h> 9 #include <sys/stat.h>
  10 +#include <cstring>
  11 +
  12 +#ifdef _WIN32
  13 +#include <Windows.h>
  14 +#else
  15 +#include <unistd.h>
  16 +#endif
10 17
11 namespace stim{ 18 namespace stim{
12 19
@@ -30,14 +37,16 @@ protected: @@ -30,14 +37,16 @@ protected:
30 37
31 double progress; //stores the progress on the current operation (accessible using a thread) 38 double progress; //stores the progress on the current operation (accessible using a thread)
32 39
  40 + size_t buffer_size; //available memory for processing large files
33 41
34 /// Private initialization function used to set default parameters in the data structure. 42 /// Private initialization function used to set default parameters in the data structure.
35 void init(){ 43 void init(){
36 - memset(R, 0, sizeof(unsigned long long) * D); //initialize the resolution to zero  
37 - header = 0; //initialize the header size to zero 44 + std::memset(R, 0, sizeof(unsigned long long) * D); //initialize the resolution to zero
  45 + header = 0; //initialize the header size to zero
38 mask = NULL; 46 mask = NULL;
39 47
40 progress = 0; 48 progress = 0;
  49 + set_buffer(); //set the maximum buffer size to the default
41 } 50 }
42 51
43 /// Private helper function that returns the size of the file on disk using system functions. 52 /// Private helper function that returns the size of the file on disk using system functions.
@@ -105,6 +114,11 @@ protected: @@ -105,6 +114,11 @@ protected:
105 114
106 public: 115 public:
107 116
  117 + //default constructor
  118 + binary(){
  119 + init();
  120 + }
  121 +
108 double get_progress(){ 122 double get_progress(){
109 return progress; 123 return progress;
110 } 124 }
@@ -113,6 +127,20 @@ public: @@ -113,6 +127,20 @@ public:
113 progress = 0; 127 progress = 0;
114 } 128 }
115 129
  130 + //specify the maximum fraction of available memory that this class will use for buffering
  131 + void set_buffer(double mem_frac = 0.5){ //default to 50%
  132 +#ifdef _WIN32
  133 + MEMORYSTATUSEX statex;
  134 + statex.dwLength = sizeof (statex);
  135 + GlobalMemoryStatusEx (&statex);
  136 + buffer_size = (size_t)(statex.ullAvailPhys * mem_frac);
  137 +#else
  138 + size_t pages = sysconf(_SC_PHYS_PAGES);
  139 + size_t page_size = sysconf(_SC_PAGE_SIZE);
  140 + buffer_size = (size_t)(pages * page_size * mem_frac);
  141 +#endif
  142 + }
  143 +
116 /// Open a binary file for streaming. 144 /// Open a binary file for streaming.
117 145
118 /// @param filename is the name of the binary file 146 /// @param filename is the name of the binary file
@@ -375,6 +403,96 @@ public: @@ -375,6 +403,96 @@ public:
375 return read_pixel(p, i); 403 return read_pixel(p, i);
376 } 404 }
377 405
  406 + /// Reads a block specified by an (x, y, z) position and size using the largest possible contiguous reads
  407 + bool read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
  408 +
  409 + size_t size_bytes = sx * sy * sz * sizeof(T); //size of the block to read in bytes
  410 +
  411 + size_t start = z * R[0] * R[1] + y * R[0] + x; //calculate the start postion
  412 + size_t start_bytes = start * sizeof(T); //start position in bytes
  413 + file.seekg(start * sizeof(T), std::ios::beg); //seek to the start position
  414 +
  415 +
  416 + if(sx == R[0] && sy == R[1]){ //if sx and sy result in a contiguous volume along z
  417 + file.read((char*)dest, size_bytes); //read the block in one pass
  418 + return true;
  419 + }
  420 +
  421 + if(sx == R[0]){ //if sx is contiguous, read each z-axis slice can be read in one pass
  422 + size_t jump_bytes = (R[1] - sy) * R[0] * sizeof(T); //jump between each slice
  423 + size_t slice_bytes = sx * sy * sizeof(T); //size of the slice to be read
  424 + for(size_t zi = 0; zi < sz; zi++){ //for each z-axis slice
  425 + file.read((char*)dest, slice_bytes); //read the slice
  426 + dest += sx * sy; //move the destination pointer to the next slice
  427 + file.seekg(jump_bytes, std::ios::cur); //skip to the next slice in the file
  428 + }
  429 + return true;
  430 + }
  431 +
  432 + //in this case, x is not contiguous so the volume must be read line-by-line
  433 + size_t jump_x_bytes = (R[0] - sx) * sizeof(T); //number of bytes skipped in the x direction
  434 + size_t jump_y_bytes = (R[1] - sy) * R[0] * sizeof(T) + jump_x_bytes; //number of bytes skipped between slices
  435 + size_t line_bytes = sx * sizeof(T); //size of the line to be read
  436 + size_t zi, yi;
  437 + for(zi = 0; zi < sz; zi++){ //for each slice
  438 + file.read((char*)dest, line_bytes); //read the first line
  439 + for(yi = 1; yi < sy; yi++){ //read each additional line
  440 + dest += sx; //move the pointer in the destination block to the next line
  441 + file.seekg(jump_x_bytes, std::ios::cur); //skip to the next line in the file
  442 + file.read((char*)dest, line_bytes); //read the line to the destination block
  443 + }
  444 + file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice
  445 + }
  446 + return false;
  447 + }
  448 +
  449 + // permutes a block of data from the current interleave to the interleave specified (re-arranged dimensions to the order specified by [d0, d1, d2])
  450 +
  451 + void permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){
  452 + size_t d[3] = {d0, d1, d2};
  453 + size_t s[3] = {sx, sy, sz};
  454 + size_t p[3];// = {x, y, z};
  455 +
  456 + if(d[0] == 0 && d[1] == 1 && d[2] == 2){
  457 + //this isn't actually a permute - just copy the data
  458 + memcpy(dest, src, sizeof(T) * sx * sy * sz);
  459 + }
  460 + else if(d[0] == 0){ //the individual lines are contiguous, so you can memcpy line-by-line
  461 + size_t y, z;
  462 + size_t src_idx, dest_idx;
  463 + size_t x_bytes = sizeof(T) * sx;
  464 + for(z = 0; z < sz; z++){
  465 + p[2] = z;
  466 + for(y = 0; y < sy; y++){
  467 + p[1] = y;
  468 + src_idx = z * sx * sy + y * sx;
  469 + dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]];
  470 + //std::cout<<z<<", "<<y<<" ------- "<<p[d[2]]<<" * "<<s[d[0]]<<" * "<<s[d[1]]<<" + "<<p[d[1]]<<" * "<<s[d[0]]<<std::endl;
  471 + memcpy(dest + dest_idx, src + src_idx, x_bytes);
  472 + }
  473 + }
  474 + }
  475 + else{ //loop through every damn point
  476 + size_t x, y, z;
  477 + size_t src_idx, dest_idx;
  478 + size_t src_z, src_y;
  479 + for(z = 0; z < sz; z++){
  480 + p[2] = z;
  481 + src_z = z * sx * sy;
  482 + for(y = 0; y < sy; y++){
  483 + p[1] = y;
  484 + src_y = src_z + y * sx;
  485 + for(x = 0; x < sx; x++){
  486 + p[0] = x;
  487 + src_idx = src_y + x;
  488 + dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]] + p[d[0]];
  489 + dest[dest_idx] = src[src_idx];
  490 + }
  491 + }
  492 + }
  493 + }
  494 + }
  495 +
378 }; 496 };
379 497
380 } 498 }
@@ -373,7 +373,7 @@ public: @@ -373,7 +373,7 @@ public:
373 for(size_t xy = 0; xy < XY; xy++){ //for each pixel 373 for(size_t xy = 0; xy < XY; xy++){ //for each pixel
374 memset(spec, 0, Bb); //set the spectrum to zero 374 memset(spec, 0, Bb); //set the spectrum to zero
375 if(mask == NULL || mask[xy]){ //if the pixel is masked 375 if(mask == NULL || mask[xy]){ //if the pixel is masked
376 - len = 0; //initialize the 376 + len = 0; //initialize the
377 file.read((char*)spec, Bb); //read a spectrum 377 file.read((char*)spec, Bb); //read a spectrum
378 for(size_t b = 0; b < B; b++) //for each band 378 for(size_t b = 0; b < B; b++) //for each band
379 len += spec[b]*spec[b]; //add the square of the spectral band 379 len += spec[b]*spec[b]; //add the square of the spectral band
@@ -385,7 +385,7 @@ public: @@ -385,7 +385,7 @@ public:
385 file.seekg(Bb, std::ios::cur); //otherwise skip a spectrum 385 file.seekg(Bb, std::ios::cur); //otherwise skip a spectrum
386 target.write((char*)spec, Bb); //output the normalized spectrum 386 target.write((char*)spec, Bb); //output the normalized spectrum
387 if(PROGRESS) progress = (double)(xy + 1) / (double)XY * 100; //update the progress 387 if(PROGRESS) progress = (double)(xy + 1) / (double)XY * 100; //update the progress
388 - } 388 + }
389 } 389 }
390 390
391 391
@@ -1088,6 +1088,232 @@ public: @@ -1088,6 +1088,232 @@ public:
1088 return true; 1088 return true;
1089 } 1089 }
1090 1090
  1091 +
  1092 +#ifdef CUDA_FOUND
  1093 + /// Calculate the covariance matrix of Noise for masked pixels using cuBLAS
  1094 + /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra
  1095 + bool coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
  1096 +
  1097 + cudaError_t cudaStat;
  1098 + cublasStatus_t stat;
  1099 + cublasHandle_t handle;
  1100 +
  1101 + progress = 0; //initialize the progress to zero (0)
  1102 + unsigned long long XY = X() * Y(); //calculate the number of elements in a band image
  1103 + unsigned long long B = Z(); //calculate the number of spectral elements
  1104 +
  1105 + double* s = (double*)malloc(sizeof(double) * B); //allocate space for the spectrum that will be pulled from the file
  1106 + double* s_dev; //declare a device pointer that will store the spectrum on the GPU
  1107 +
  1108 + double* s2_dev; // device pointer on the GPU
  1109 + cudaStat = cudaMalloc(&s2_dev, B * sizeof(double)); // allocate space on the CUDA device
  1110 + cudaStat = cudaMemset(s2_dev, 0, B * sizeof(double)); // initialize s2_dev to zero (0)
  1111 +
  1112 + double* A_dev; //declare a device pointer that will store the covariance matrix on the GPU
  1113 + double* avg_dev; //declare a device pointer that will store the average spectrum
  1114 + cudaStat = cudaMalloc(&s_dev, B * sizeof(double)); //allocate space on the CUDA device for the spectrum
  1115 + cudaStat = cudaMalloc(&A_dev, B * B * sizeof(double)); //allocate space on the CUDA device for the covariance matrix
  1116 + cudaStat = cudaMemset(A_dev, 0, B * B * sizeof(double)); //initialize the covariance matrix to zero (0)
  1117 + cudaStat = cudaMalloc(&avg_dev, B * sizeof(double)); //allocate space on the CUDA device for the average spectrum
  1118 + stat = cublasSetVector((int)B, sizeof(double), avg, 1, avg_dev, 1); //copy the average spectrum to the CUDA device
  1119 +
  1120 + double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product)
  1121 + double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction)
  1122 +
  1123 + stat = cublasCreate(&handle); //create a cuBLAS instance
  1124 + if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid
  1125 + printf ("CUBLAS initialization failed\n");
  1126 + return EXIT_FAILURE;
  1127 + }
  1128 + for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel
  1129 + if (mask == NULL || mask[xy] != 0){
  1130 + pixeld(s, xy); //retreive the spectrum at the current xy pixel location
  1131 +
  1132 + stat = cublasSetVector((int)B, sizeof(double), s, 1, s_dev, 1); //copy the spectrum from the host to the device
  1133 + stat = cublasDaxpy(handle, (int)B, &axpy_alpha, avg_dev, 1, s_dev, 1); //subtract the average spectrum
  1134 +
  1135 + cudaMemcpy(s2_dev, s_dev + 1 , (B-1) * sizeof(double), cudaMemcpyDeviceToDevice); //copy B-1 elements from shifted source data (s_dev) to device pointer (s2_dev )
  1136 + stat = cublasDaxpy(handle, (int)B, &axpy_alpha, s2_dev, 1, s_dev, 1); //Minimum/Maximum Autocorrelation Factors (MAF) method : subtranct each pixel from adjacent pixel (z direction is choosed to do so , which is almost the same as x or y direction or even average of them )
  1137 +
  1138 +
  1139 + stat = cublasDsyr(handle, CUBLAS_FILL_MODE_UPPER, (int)B, &ger_alpha, s_dev, 1, A_dev, (int)B); //calculate the covariance matrix (symmetric outer product)
  1140 + }
  1141 + if(PROGRESS) progress = (double)(xy+1) / XY * 100; //record the current progress
  1142 +
  1143 + }
  1144 +
  1145 + cublasGetMatrix((int)B, (int)B, sizeof(double), A_dev, (int)B, coN, (int)B); //copy the result from the GPU to the CPU
  1146 +
  1147 + cudaFree(A_dev); //clean up allocated device memory
  1148 + cudaFree(s_dev);
  1149 + cudaFree(s2_dev);
  1150 + cudaFree(avg_dev);
  1151 +
  1152 + for(unsigned long long i = 0; i < B; i++){ //copy the upper triangular portion to the lower triangular portion
  1153 + for(unsigned long long j = i+1; j < B; j++){
  1154 + coN[B * i + j] = coN[B * j + i];
  1155 + }
  1156 + }
  1157 +
  1158 + return true;
  1159 + }
  1160 +#endif
  1161 +
  1162 + /// Calculate the covariance of noise matrix for all masked pixels in the image with 64-bit floating point precision.
  1163 +
  1164 + /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
  1165 + /// @param avg is a pointer to memory of size B that stores the average spectrum
  1166 + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
  1167 + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
  1168 +
  1169 +#ifdef CUDA_FOUND
  1170 + int dev_count;
  1171 + cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
  1172 + cudaDeviceProp prop;
  1173 + cudaGetDeviceProperties(&prop, 0); //get the property of the first device
  1174 + if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator
  1175 + return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1176 +#endif
  1177 +
  1178 +
  1179 +
  1180 + progress = 0;
  1181 + //memory allocation
  1182 + unsigned long long XY = X() * Y();
  1183 + unsigned long long B = Z();
  1184 + T* temp = (T*)malloc(sizeof(T) * B);
  1185 +
  1186 + unsigned long long count = nnz(mask); //count the number of masked pixels
  1187 +
  1188 + //initialize covariance matrix of noise
  1189 + memset(coN, 0, B * B * sizeof(double));
  1190 +
  1191 + //calculate covariance matrix
  1192 + double* coN_half = (double*) malloc(B * B * sizeof(double)); //allocate space for a higher-precision intermediate matrix
  1193 + double* temp_precise = (double*) malloc(B * sizeof(double));
  1194 + memset(coN_half, 0, B * B * sizeof(double)); //initialize the high-precision matrix with zeros
  1195 + unsigned long long idx; //stores i*B to speed indexing
  1196 + for (unsigned long long xy = 0; xy < XY; xy++){
  1197 + if (mask == NULL || mask[xy] != 0){
  1198 + pixel(temp, xy); //retreive the spectrum at the current xy pixel location
  1199 + for(unsigned long long b = 0; b < B; b++) //subtract the mean from this spectrum and increase the precision
  1200 + temp_precise[b] = (double)temp[b] - (double)avg[b];
  1201 +
  1202 + for(unsigned long long b2 = 0; b2 < B-1; b2++) //Minimum/Maximum Autocorrelation Factors (MAF) method : subtranct each pixel from adjacent pixel (z direction is choosed to do so , which is almost the same as x or y direction or even average of them )
  1203 + temp_precise[b2] -= temp_precise[b2+1];
  1204 +
  1205 + idx = 0;
  1206 + for (unsigned long long b0 = 0; b0 < B; b0++){ //for each band
  1207 + for (unsigned long long b1 = b0; b1 < B; b1++)
  1208 + coN_half[idx++] += temp_precise[b0] * temp_precise[b1];
  1209 + }
  1210 + }
  1211 + if(PROGRESS) progress = (double)(xy+1) / XY * 100;
  1212 + }
  1213 + idx = 0;
  1214 + for (unsigned long long i = 0; i < B; i++){ //copy the precision matrix to both halves of the output matrix
  1215 + for (unsigned long long j = i; j < B; j++){
  1216 + coN[j * B + i] = coN[i * B + j] = coN_half[idx++] / (double) count;
  1217 + }
  1218 + }
  1219 +
  1220 + free(temp);
  1221 + free(temp_precise);
  1222 + return true;
  1223 + }
  1224 +
  1225 + #ifdef CUDA_FOUND
  1226 + /// Project the spectra onto a set of basis functions
  1227 + /// @param outfile is the name of the new binary output file that will be created
  1228 + /// @param center is a spectrum about which the data set will be rotated (ex. when performing mean centering)
  1229 + /// @param basis a set of basis vectors that the data set will be projected onto (after centering)
  1230 + /// @param M is the number of basis vectors
  1231 + /// @param mask is a character mask used to limit processing to valid pixels
  1232 + bool project_cublas(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){
  1233 +
  1234 + cudaError_t cudaStat;
  1235 + cublasStatus_t stat;
  1236 + cublasHandle_t handle;
  1237 +
  1238 + std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file
  1239 +
  1240 + progress = 0; //initialize the progress to zero (0)
  1241 + unsigned long long XY = X() * Y(); //calculate the number of elements in a band image
  1242 + unsigned long long B = Z(); //calculate the number of spectral elements
  1243 +
  1244 + double* s = (double*)malloc(sizeof(double) * B); //allocate space for the spectrum that will be pulled from the file
  1245 + double* s_dev; //declare a device pointer that will store the spectrum on the GPU
  1246 + cudaStat = cudaMalloc(&s_dev, B * sizeof(double)); //allocate space on the CUDA device for the spectrum
  1247 +
  1248 +
  1249 + double* basis_dev; // device pointer on the GPU
  1250 + cudaStat = cudaMalloc(&basis_dev, M * B * sizeof(double)); // allocate space on the CUDA device
  1251 + cudaStat = cudaMemset(basis_dev, 0, M * B * sizeof(double)); // initialize basis_dev to zero (0)
  1252 +
  1253 +
  1254 + /// transposing basis matrix (because cuBLAS is column-major)
  1255 + double *basis_Transposed = (double*)malloc(M * B * sizeof(double));
  1256 + memset(basis_Transposed, 0, M * B * sizeof(double));
  1257 + for (int i = 0; i<M; i++)
  1258 + for (int j = 0; j<B; j++)
  1259 + basis_Transposed[i+j*M] = basis[i*B+j];
  1260 +
  1261 + stat = cublasSetMatrix((int)M, (int)B, sizeof(double),basis_Transposed, (int)M, basis_dev, (int)M); //copy the basis_Transposed matrix to the CUDA device (both matrices are stored in column-major format)
  1262 +
  1263 + double* center_dev; //declare a device pointer that will store the center (average)
  1264 + cudaStat = cudaMalloc(&center_dev, B * sizeof(double)); //allocate space on the CUDA device for the center (average)
  1265 + stat = cublasSetVector((int)B, sizeof(double), center, 1, center_dev, 1); //copy the center vector (average) to the CUDA device (from host to device)
  1266 +
  1267 +
  1268 + double* A = (double*)malloc(sizeof(double) * M); //allocate space for the projected pixel on the host
  1269 + double* A_dev; //declare a device pointer that will store the projected pixel on the GPU
  1270 + cudaStat = cudaMalloc(&A_dev,M * sizeof(double)); //allocate space on the CUDA device for the projected pixel
  1271 + cudaStat = cudaMemset(A_dev, 0,M * sizeof(double)); //initialize the projected pixel to zero (0)
  1272 +
  1273 + double axpy_alpha = -1; //multiplication factor for the center (in order to perform a subtraction)
  1274 + double axpy_alpha2 = 1; //multiplication factor for the matrix-vector multiplication
  1275 + double axpy_beta = 0; //multiplication factor for the matrix-vector multiplication (there is no second scalor)
  1276 +
  1277 + stat = cublasCreate(&handle); //create a cuBLAS instance
  1278 + if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid
  1279 + printf ("CUBLAS initialization failed\n");
  1280 + return EXIT_FAILURE;
  1281 + }
  1282 +
  1283 + T* temp = (T*)malloc(sizeof(T) * M); //allocate space for the projected pixel to be written on the disc
  1284 + size_t i;
  1285 + for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel
  1286 + if (mask == NULL || mask[xy] != 0){
  1287 + pixeld(s, xy); //retreive the spectrum at the current xy pixel location
  1288 +
  1289 + stat = cublasSetVector((int)B, sizeof(double), s, 1, s_dev, 1); //copy the spectrum from the host to the device
  1290 + stat = cublasDaxpy(handle, (int)B, &axpy_alpha, center_dev, 1, s_dev, 1); //subtract the center (average)
  1291 + stat = cublasDgemv(handle,CUBLAS_OP_N,(int)M,(int)B,&axpy_alpha2,basis_dev,(int)M,s_dev,1,&axpy_beta,A_dev,1); //performs the matrix-vector multiplication
  1292 + stat = cublasGetVector((int)B, sizeof(double), A_dev, 1, A, 1); //copy the projected pixel to the host (from GPU to CPU)
  1293 +
  1294 + //std::copy<double*, T*>(A, A + M, temp);
  1295 + for(i = 0; i < M; i++) temp[i] = (T)A[i]; //casting projected pixel from double to whatever T is
  1296 + }
  1297 +
  1298 + target.write(reinterpret_cast<const char*>(temp), sizeof(T) * M); //write the projected vector
  1299 + if(PROGRESS) progress = (double)(xy+1) / XY * 100; //record the current progress
  1300 +
  1301 + }
  1302 +
  1303 + //clean up allocated device memory
  1304 + cudaFree(A_dev);
  1305 + cudaFree(s_dev);
  1306 + cudaFree(basis_dev);
  1307 + cudaFree(center_dev);
  1308 + free(A);
  1309 + free(s);
  1310 + free(temp);
  1311 + target.close(); //close the output file
  1312 +
  1313 + return true;
  1314 + }
  1315 +#endif
  1316 +
1091 /// Project the spectra onto a set of basis functions 1317 /// Project the spectra onto a set of basis functions
1092 /// @param outfile is the name of the new binary output file that will be created 1318 /// @param outfile is the name of the new binary output file that will be created
1093 /// @param center is a spectrum about which the data set will be rotated (ex. when performing mean centering) 1319 /// @param center is a spectrum about which the data set will be rotated (ex. when performing mean centering)
@@ -1096,6 +1322,14 @@ public: @@ -1096,6 +1322,14 @@ public:
1096 /// @param mask is a character mask used to limit processing to valid pixels 1322 /// @param mask is a character mask used to limit processing to valid pixels
1097 bool project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){ 1323 bool project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){
1098 1324
  1325 +#ifdef CUDA_FOUND
  1326 + int dev_count;
  1327 + cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
  1328 + cudaDeviceProp prop;
  1329 + cudaGetDeviceProperties(&prop, 0); //get the property of the first device
  1330 + if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator
  1331 + return project_cublas(outfile,center,basis,M,mask,PROGRESS); //use cuBLAS to calculate the covariance matrix
  1332 +#endif
1099 std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file 1333 std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file
1100 //std::string headername = outfile + ".hdr"; //the header file name 1334 //std::string headername = outfile + ".hdr"; //the header file name
1101 1335
@@ -1125,7 +1359,7 @@ public: @@ -1125,7 +1359,7 @@ public:
1125 free(s); //free temporary storage arrays 1359 free(s); //free temporary storage arrays
1126 free(rs); 1360 free(rs);
1127 target.close(); //close the output file 1361 target.close(); //close the output file
1128 - 1362 +
1129 return true; 1363 return true;
1130 } 1364 }
1131 1365
@@ -1395,6 +1629,52 @@ public: @@ -1395,6 +1629,52 @@ public:
1395 } 1629 }
1396 } 1630 }
1397 1631
  1632 + bool multiply(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1633 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1634 + std::string headername = outname + ".hdr"; //the header file name
  1635 +
  1636 + unsigned long long N = X() * Y(); //calculate the total number of pixels to be processed
  1637 + unsigned long long B = Z(); //get the number of bands
  1638 + T* s = (T*)malloc(sizeof(T) * B); //allocate memory to store a pixel
  1639 + for(unsigned long long n = 0; n < N; n++){ //for each pixel in the image
  1640 + if(mask == NULL || mask[n]){ //if the pixel is masked
  1641 + for(size_t b = 0; b < B; b++) //for each band in the spectrum
  1642 + s[b] *= (T)v; //multiply
  1643 + }
  1644 +
  1645 + if(PROGRESS) progress = (double)(n+1) / N * 100; //set the current progress
  1646 +
  1647 + target.write((char*)s, sizeof(T) * B); //write the corrected data into destination
  1648 + } //end for each pixel
  1649 +
  1650 + free(s); //free the spectrum
  1651 + target.close(); //close the output file
  1652 + return true;
  1653 + }
  1654 +
  1655 + bool add(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1656 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1657 + std::string headername = outname + ".hdr"; //the header file name
  1658 +
  1659 + unsigned long long N = X() * Y(); //calculate the total number of pixels to be processed
  1660 + unsigned long long B = Z(); //get the number of bands
  1661 + T* s = (T*)malloc(sizeof(T) * B); //allocate memory to store a pixel
  1662 + for(unsigned long long n = 0; n < N; n++){ //for each pixel in the image
  1663 + if(mask == NULL || mask[n]){ //if the pixel is masked
  1664 + for(size_t b = 0; b < B; b++) //for each band in the spectrum
  1665 + s[b] += (T)v; //multiply
  1666 + }
  1667 +
  1668 + if(PROGRESS) progress = (double)(n+1) / N * 100; //set the current progress
  1669 +
  1670 + target.write((char*)s, sizeof(T) * B); //write the corrected data into destination
  1671 + } //end for each pixel
  1672 +
  1673 + free(s); //free the spectrum
  1674 + target.close(); //close the output file
  1675 + return true;
  1676 + }
  1677 +
1398 1678
1399 1679
1400 /// Close the file. 1680 /// Close the file.
@@ -9,6 +9,7 @@ @@ -9,6 +9,7 @@
9 #include <vector> 9 #include <vector>
10 #include <deque> 10 #include <deque>
11 #include <chrono> 11 #include <chrono>
  12 +#include <future>
12 13
13 14
14 15
@@ -376,36 +377,144 @@ public: @@ -376,36 +377,144 @@ public:
376 377
377 } 378 }
378 379
379 - /// Convert the current BSQ file to a BIL file with the specified file name.  
380 -  
381 - /// @param outname is the name of the output BIL file to be saved to disk.  
382 - bool bil(std::string outname, bool PROGRESS = false)  
383 - {  
384 - //simplify image resolution  
385 - unsigned long long jump = (Y() - 1) * X() * sizeof(T); 380 + void readlines(T* dest, size_t start, size_t n){
  381 + hsi<T>::read(dest, 0, start, 0, X(), n, Z());
  382 + }
386 383
387 - std::ofstream target(outname.c_str(), std::ios::binary);  
388 - std::string headername = outname + ".hdr"; 384 + /// Convert this BSQ file to a BIL
  385 + bool bil(std::string outname, bool PROGRESS = false){
389 386
390 - unsigned long long L = X();  
391 - T* line = (T*)malloc(sizeof(T) * L); 387 + const size_t buffers = 4; //number of buffers required for this algorithm
  388 + size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
392 389
393 - for ( unsigned long long y = 0; y < Y(); y++) //for each y position  
394 - {  
395 - file.seekg(y * X() * sizeof(T), std::ios::beg); //seek to the beginning of the xz slice  
396 - for ( unsigned long long z = 0; z < Z(); z++ ) //for each band  
397 - {  
398 - file.read((char *)line, sizeof(T) * X()); //read a line  
399 - target.write((char*)line, sizeof(T) * X()); //write the line to the output file  
400 - file.seekg(jump, std::ios::cur); //seek to the next band  
401 - if(PROGRESS) progress = (double)((y+1) * Z() + z + 1) / (Z() * Y()) * 100; //update the progress counter 390 + size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
  391 + size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
  392 + if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
  393 + std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
  394 + exit(1);
  395 + }
  396 + size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers
  397 +
  398 + T* src[2]; //source double-buffer for asynchronous batching
  399 + src[0] = (T*) malloc(max_batch_bytes);
  400 + src[1] = (T*) malloc(max_batch_bytes);
  401 + T* dst[2]; //destination double-buffer for asynchronous batching
  402 + dst[0] = (T*) malloc(max_batch_bytes);
  403 + dst[1] = (T*) malloc(max_batch_bytes);
  404 +
  405 + size_t N[2]; //number of slices stored in buffers 0 and 1
  406 + N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
  407 +
  408 + std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing
  409 + //initialize with buffer 0 (used for double buffering)
  410 + size_t y_load = 0;
  411 + size_t y_proc = 0;
  412 + std::future<void> rthread;
  413 + std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
  414 +
  415 + readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
  416 + y_load += N[0]; //increment the loaded slice counter
  417 + int b = 1;
  418 +
  419 + std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers
  420 + std::chrono::high_resolution_clock::time_point t_end;
  421 + size_t t_batch; //number of milliseconds to process a batch
  422 + size_t t_total = 0;
  423 + while(y_proc < Y()){ //while there are still slices to be processed
  424 + t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
  425 + if(y_load < Y()){ //if there are still slices to be loaded, load them
  426 + if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size
  427 + rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
  428 +
  429 + y_load += N[b]; //increment the number of loaded slices
402 } 430 }
  431 +
  432 + b = !b; //swap the double-buffer
  433 +
  434 + binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file
  435 + target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file
  436 + y_proc += N[b]; //increment the counter of processed pixels
  437 + if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  438 + t_end = std::chrono::high_resolution_clock::now();
  439 + t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
  440 + t_total += t_batch;
  441 + rthread.wait();
403 } 442 }
404 443
405 - free(line);  
406 - target.close(); 444 + std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl;
  445 + free(src[0]); //free buffer resources
  446 + free(src[1]);
  447 + free(dst[0]);
  448 + free(dst[1]);
  449 + return true; //return true
  450 + }
407 451
408 - return true; 452 + /// Convert this BSQ file to a BIP
  453 + bool bip(std::string outname, bool PROGRESS = false){
  454 +
  455 + const size_t buffers = 4; //number of buffers required for this algorithm
  456 + size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
  457 +
  458 + size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
  459 + size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
  460 + if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
  461 + std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
  462 + exit(1);
  463 + }
  464 + size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers
  465 +
  466 + T* src[2]; //source double-buffer for asynchronous batching
  467 + src[0] = (T*) malloc(max_batch_bytes);
  468 + src[1] = (T*) malloc(max_batch_bytes);
  469 + T* dst[2]; //destination double-buffer for asynchronous batching
  470 + dst[0] = (T*) malloc(max_batch_bytes);
  471 + dst[1] = (T*) malloc(max_batch_bytes);
  472 +
  473 + size_t N[2]; //number of slices stored in buffers 0 and 1
  474 + N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
  475 +
  476 + std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing
  477 + //initialize with buffer 0 (used for double buffering)
  478 + size_t y_load = 0;
  479 + size_t y_proc = 0;
  480 + std::future<void> rthread;
  481 + std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
  482 +
  483 + readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
  484 + y_load += N[0]; //increment the loaded slice counter
  485 + int b = 1;
  486 +
  487 + std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers
  488 + std::chrono::high_resolution_clock::time_point t_end;
  489 + size_t t_batch; //number of milliseconds to process a batch
  490 + size_t t_total = 0;
  491 + while(y_proc < Y()){ //while there are still slices to be processed
  492 + t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
  493 + if(y_load < Y()){ //if there are still slices to be loaded, load them
  494 + if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size
  495 + rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
  496 +
  497 + y_load += N[b]; //increment the number of loaded slices
  498 + }
  499 +
  500 + b = !b; //swap the double-buffer
  501 +
  502 + binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file
  503 + target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file
  504 + y_proc += N[b]; //increment the counter of processed pixels
  505 + if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  506 + t_end = std::chrono::high_resolution_clock::now();
  507 + t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
  508 + t_total += t_batch;
  509 + rthread.wait();
  510 + }
  511 +
  512 + std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl;
  513 + free(src[0]); //free buffer resources
  514 + free(src[1]);
  515 + free(dst[0]);
  516 + free(dst[1]);
  517 + return true; //return true
409 } 518 }
410 519
411 /// Return a baseline corrected band given two adjacent baseline points and their bands. The result is stored in a pre-allocated array. 520 /// Return a baseline corrected band given two adjacent baseline points and their bands. The result is stored in a pre-allocated array.
@@ -1238,6 +1347,60 @@ public: @@ -1238,6 +1347,60 @@ public:
1238 if(PROGRESS) progress = (double)(b+1) / (double)B * 100; 1347 if(PROGRESS) progress = (double)(b+1) / (double)B * 100;
1239 } 1348 }
1240 1349
  1350 + } //end deriv
  1351 +
  1352 + bool multiply(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1353 + unsigned long long B = Z(); //calculate the number of bands
  1354 + unsigned long long XY = X() * Y(); //calculate the number of pixels in a band
  1355 + unsigned long long S = XY * sizeof(T); //calculate the number of bytes in a band
  1356 +
  1357 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1358 + std::string headername = outname + ".hdr"; //the header file name
  1359 +
  1360 + T * c; //pointer to the current image
  1361 + c = (T*)malloc( S ); //allocate memory for the band image
  1362 +
  1363 + for(unsigned long long j = 0; j < B; j++){ //for each band
  1364 + band_index(c, j); //load the current band
  1365 + for(unsigned long long i = 0; i < XY; i++){ //for each pixel
  1366 + if(mask == NULL || mask[i]) //if the pixel is masked
  1367 + c[i] *= (T)v; //perform the multiplication
  1368 + }
  1369 + target.write(reinterpret_cast<const char*>(c), S); //write normalized data into destination
  1370 +
  1371 + if(PROGRESS) progress = (double)(j+1) / B * 100; //update the progress
  1372 + }
  1373 +
  1374 + free(c); //free the band
  1375 + target.close(); //close the output file
  1376 + return true;
  1377 + }
  1378 +
  1379 + bool add(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1380 + unsigned long long B = Z(); //calculate the number of bands
  1381 + unsigned long long XY = X() * Y(); //calculate the number of pixels in a band
  1382 + unsigned long long S = XY * sizeof(T); //calculate the number of bytes in a band
  1383 +
  1384 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1385 + std::string headername = outname + ".hdr"; //the header file name
  1386 +
  1387 + T * c; //pointer to the current image
  1388 + c = (T*)malloc( S ); //allocate memory for the band image
  1389 +
  1390 + for(unsigned long long j = 0; j < B; j++){ //for each band
  1391 + band_index(c, j); //load the current band
  1392 + for(unsigned long long i = 0; i < XY; i++){ //for each pixel
  1393 + if(mask == NULL || mask[i]) //if the pixel is masked
  1394 + c[i] += (T)v; //perform the multiplication
  1395 + }
  1396 + target.write(reinterpret_cast<const char*>(c), S); //write normalized data into destination
  1397 +
  1398 + if(PROGRESS) progress = (double)(j+1) / B * 100; //update the progress
  1399 + }
  1400 +
  1401 + free(c); //free the band
  1402 + target.close(); //close the output file
  1403 + return true;
1241 } 1404 }
1242 1405
1243 1406
@@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
7 #include "../envi/bil.h" 7 #include "../envi/bil.h"
8 #include "../math/fd_coefficients.h" 8 #include "../math/fd_coefficients.h"
9 #include <iostream> 9 #include <iostream>
  10 +#include <fstream>
10 //#include "../image/image.h" 11 //#include "../image/image.h"
11 12
12 namespace stim{ 13 namespace stim{
@@ -58,15 +59,17 @@ class envi{ @@ -58,15 +59,17 @@ class envi{
58 for(size_t i = 0; i < len; i++) 59 for(size_t i = 0; i < len; i++)
59 cast(&dst[i], &src[i]); 60 cast(&dst[i], &src[i]);
60 } 61 }
61 - 62 +
62 public: 63 public:
  64 + envi_header header;
63 65
  66 +
64 /// Default constructor 67 /// Default constructor
65 envi(){ 68 envi(){
66 file = NULL; //set the file pointer to NULL 69 file = NULL; //set the file pointer to NULL
67 } 70 }
68 71
69 - envi_header header; 72 +
70 73
71 void* malloc_spectrum(){ 74 void* malloc_spectrum(){
72 return alloc_array(header.bands); 75 return alloc_array(header.bands);
@@ -76,6 +79,40 @@ public: @@ -76,6 +79,40 @@ public:
76 return alloc_array(header.samples * header.lines); 79 return alloc_array(header.samples * header.lines);
77 } 80 }
78 81
  82 + void set_buffer(double memfrac = 0.5){
  83 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  84 + if(header.data_type ==envi_header::float32)
  85 + ((bsq<float>*)file)->set_buffer(memfrac);
  86 + else if(header.data_type == envi_header::float64)
  87 + ((bsq<double>*)file)->set_buffer(memfrac);
  88 + else
  89 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  90 + }
  91 +
  92 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  93 + if(header.data_type ==envi_header::float32)
  94 + ((bil<float>*)file)->set_buffer(memfrac);
  95 + else if(header.data_type == envi_header::float64)
  96 + ((bil<double>*)file)->set_buffer(memfrac);
  97 + else
  98 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  99 + }
  100 +
  101 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  102 + if(header.data_type ==envi_header::float32)
  103 + ((bip<float>*)file)->set_buffer(memfrac);
  104 + else if(header.data_type == envi_header::float64)
  105 + ((bip<double>*)file)->set_buffer(memfrac);
  106 + else
  107 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  108 + }
  109 +
  110 + else{
  111 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  112 + exit(1);
  113 + }
  114 + }
  115 +
79 /// Returns the size of the data type in bytes 116 /// Returns the size of the data type in bytes
80 unsigned int type_size(){ 117 unsigned int type_size(){
81 if(header.data_type == envi_header::float32) return 4; 118 if(header.data_type == envi_header::float32) return 4;
@@ -224,6 +261,37 @@ public: @@ -224,6 +261,37 @@ public:
224 261
225 } 262 }
226 263
  264 + /// Open an Agilent binary file as an ENVI stream
  265 + bool open_agilent(std::string filename){
  266 + fname = filename; //store the file name
  267 +
  268 + //Open the file temporarily to get the header information
  269 + FILE* f = fopen(filename.c_str(), "r"); //open the binary file for reading
  270 + if(f == NULL) return false; //return false if no file is opened
  271 +
  272 + fseek(f, 9, SEEK_SET); //seek to the number of bands
  273 + short b; //allocate space for the number of bands
  274 + fread(&b, sizeof(short), 1, f); //read the number of bands
  275 + fseek(f, 13, SEEK_CUR); //skip the the x and y dimensions
  276 + short x, y;
  277 + fread(&x, sizeof(short), 1, f); //read the image x and y size
  278 + fread(&y, sizeof(short), 1, f);
  279 + fclose(f); //close the file
  280 +
  281 + //store the information from the Agilent header in the ENVI header
  282 + header.bands = b;
  283 + header.samples = x;
  284 + header.lines = y;
  285 + header.data_type = envi_header::float32; //all values are 32-bit floats
  286 + header.header_offset = 1020; //number of bytes in an Agilent binary header
  287 + header.interleave = envi_header::BSQ; //all Agilent binary files are BSQ
  288 +
  289 + allocate(); //allocate the streaming file object
  290 + open(); //open the file for streaming
  291 +
  292 + return true;
  293 + }
  294 +
227 /// Open an existing ENVI file given the filename and a header structure 295 /// Open an existing ENVI file given the filename and a header structure
228 296
229 /// @param filename is the name of the ENVI binary file 297 /// @param filename is the name of the ENVI binary file
@@ -257,7 +325,6 @@ public: @@ -257,7 +325,6 @@ public:
257 //header.load(headername); 325 //header.load(headername);
258 326
259 return open(filename, h); 327 return open(filename, h);
260 -  
261 } 328 }
262 329
263 /// Normalize a hyperspectral ENVI file given a band number and threshold. 330 /// Normalize a hyperspectral ENVI file given a band number and threshold.
@@ -454,9 +521,9 @@ public: @@ -454,9 +521,9 @@ public:
454 else if(interleave == envi_header::BIL) //convert BSQ -> BIL 521 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
455 ((bsq<float>*)file)->bil(outfile, PROGRESS); 522 ((bsq<float>*)file)->bil(outfile, PROGRESS);
456 else if(interleave == envi_header::BIP){ //ERROR 523 else if(interleave == envi_header::BIP){ //ERROR
457 - std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;  
458 - //return ((bsq<float>*)file)->bip(outfile, PROGRESS);  
459 - exit(1); 524 + //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
  525 + ((bsq<float>*)file)->bip(outfile, PROGRESS);
  526 + //exit(1);
460 } 527 }
461 } 528 }
462 529
@@ -468,9 +535,9 @@ public: @@ -468,9 +535,9 @@ public:
468 else if(interleave == envi_header::BIL) //convert BSQ -> BIL 535 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
469 ((bsq<double>*)file)->bil(outfile, PROGRESS); 536 ((bsq<double>*)file)->bil(outfile, PROGRESS);
470 else if(interleave == envi_header::BIP){ //ERROR 537 else if(interleave == envi_header::BIP){ //ERROR
471 - std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;  
472 - //return ((bsq<float>*)file)->bip(outfile, PROGRESS);  
473 - exit(1); 538 + //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
  539 + ((bsq<float>*)file)->bip(outfile, PROGRESS);
  540 + //exit(1);
474 } 541 }
475 } 542 }
476 543
@@ -1106,46 +1173,6 @@ public: @@ -1106,46 +1173,6 @@ public:
1106 return false; 1173 return false;
1107 } 1174 }
1108 1175
1109 - /// Retrieve a spectrum from the specified location  
1110 -  
1111 - /// @param ptr is a pointer to pre-allocated memory of size B*sizeof(T)  
1112 - /// @param x is the x-coordinate of the spectrum  
1113 - /// @param y is the y-coordinate of the spectrum  
1114 - /*bool spectrum(void* ptr, unsigned long long x, unsigned long long y, bool PROGRESS = false){  
1115 -  
1116 - if(header.interleave == envi_header::BSQ){ //if the infile is bsq file  
1117 - if(header.data_type ==envi_header::float32)  
1118 - return ((bsq<float>*)file)->spectrum((float*)ptr, x, y, PROGRESS);  
1119 - else if (header.data_type == envi_header::float64)  
1120 - return ((bsq<double>*)file)->spectrum((double*)ptr, x, y, PROGRESS);  
1121 - else{  
1122 - std::cout << "ERROR: unidentified data type" << std::endl;  
1123 - exit(1);  
1124 - }  
1125 - }  
1126 - else if (header.interleave == envi_header::BIL){  
1127 - if (header.data_type == envi_header::float32)  
1128 - return ((bil<float>*)file)->spectrum((float*)ptr, x, y, PROGRESS);  
1129 - else if (header.data_type == envi_header::float64)  
1130 - return ((bil<double>*)file)->spectrum((double*)ptr, x, y, PROGRESS);  
1131 - else{  
1132 - std::cout << "ERROR: unidentified data type" << std::endl;  
1133 - exit(1);  
1134 - }  
1135 - }  
1136 - else if (header.interleave == envi_header::BIP){  
1137 - if (header.data_type == envi_header::float32)  
1138 - return ((bip<float>*)file)->spectrum((float*)ptr, x, y, PROGRESS);  
1139 - else if (header.data_type == envi_header::float64)  
1140 - return ((bip<double>*)file)->spectrum((double*)ptr, x, y, PROGRESS);  
1141 - else{  
1142 - std::cout << "ERROR: unidentified data type" << std::endl;  
1143 - exit(1);  
1144 - }  
1145 - }  
1146 - return false;  
1147 - }*/  
1148 -  
1149 // Retrieve a spectrum at the specified 1D location 1176 // Retrieve a spectrum at the specified 1D location
1150 1177
1151 /// @param ptr is a pointer to pre-allocated memory of size B*sizeof(T) 1178 /// @param ptr is a pointer to pre-allocated memory of size B*sizeof(T)
@@ -1209,50 +1236,6 @@ public: @@ -1209,50 +1236,6 @@ public:
1209 void spectrum(T* ptr, size_t x, size_t y, bool PROGRESS = false){ 1236 void spectrum(T* ptr, size_t x, size_t y, bool PROGRESS = false){
1210 1237
1211 spectrum<T>(ptr, y * header.samples + x, PROGRESS); 1238 spectrum<T>(ptr, y * header.samples + x, PROGRESS);
1212 - /*void* temp = alloc_array<T>(header.bands); //allocate space for the output array  
1213 -  
1214 - if(header.interleave == envi_header::BSQ){ //if the infile is bsq file  
1215 - if(header.data_type ==envi_header::float32){  
1216 - ((bsq<float>*)file)->spectrum((float*)temp, x, y, PROGRESS);  
1217 - cast<T, float>(ptr, temp, header.bands);  
1218 - }  
1219 - else if (header.data_type == envi_header::float64){  
1220 - ((bsq<double>*)file)->spectrum((double*)temp, x, y, PROGRESS);  
1221 - cast<T, double>(ptr, temp, header.bands);  
1222 - }  
1223 - else{  
1224 - std::cout << "ERROR: unidentified data type" << std::endl;  
1225 - exit(1);  
1226 - }  
1227 - }  
1228 - else if (header.interleave == envi_header::BIL){  
1229 - if (header.data_type == envi_header::float32){  
1230 - ((bil<float>*)file)->spectrum((float*)temp, x, y, PROGRESS);  
1231 - cast<T, float>(ptr, temp, header.bands);  
1232 - }  
1233 - else if (header.data_type == envi_header::float64){  
1234 - ((bil<double>*)file)->spectrum((double*)temp, x, y, PROGRESS);  
1235 - cast<T, double>(ptr, temp, header.bands);  
1236 - }  
1237 - else{  
1238 - std::cout << "ERROR: unidentified data type" << std::endl;  
1239 - exit(1);  
1240 - }  
1241 - }  
1242 - else if (header.interleave == envi_header::BIP){  
1243 - if (header.data_type == envi_header::float32){  
1244 - ((bip<float>*)file)->spectrum((float*)temp, x, y, PROGRESS);  
1245 - cast<T, float>(ptr, temp, header.bands);  
1246 - }  
1247 - else if (header.data_type == envi_header::float64){  
1248 - ((bip<double>*)file)->spectrum((double*)temp, x, y, PROGRESS);  
1249 - cast<T, double>(ptr, temp, header.bands);  
1250 - }  
1251 - else{  
1252 - std::cout << "ERROR: unidentified data type" << std::endl;  
1253 - exit(1);  
1254 - }  
1255 - }*/  
1256 } 1239 }
1257 1240
1258 /// Retrieve a single band (based on index) and stores it in pre-allocated memory. 1241 /// Retrieve a single band (based on index) and stores it in pre-allocated memory.
@@ -1340,14 +1323,6 @@ public: @@ -1340,14 +1323,6 @@ public:
1340 if (header.interleave == envi_header::BSQ){ 1323 if (header.interleave == envi_header::BSQ){
1341 std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl; 1324 std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl;
1342 exit(1); 1325 exit(1);
1343 - /*if (header.data_type == envi_header::float32)  
1344 - return ((bsq<float>*)file)->co_matrix(co, avg, mask, PROGRESS);  
1345 - else if (header.data_type == envi_header::float64)  
1346 - return ((bsq<double>*)file)->co_matrix(co, avg, mask, PROGRESS);  
1347 - else{  
1348 - std::cout << "ERROR: unidentified data type" << std::endl;  
1349 - exit(1);  
1350 - }*/  
1351 } 1326 }
1352 else if (header.interleave == envi_header::BIL){ 1327 else if (header.interleave == envi_header::BIL){
1353 if (header.data_type == envi_header::float32) 1328 if (header.data_type == envi_header::float32)
@@ -1372,6 +1347,35 @@ public: @@ -1372,6 +1347,35 @@ public:
1372 return false; 1347 return false;
1373 } 1348 }
1374 1349
  1350 + /// Calculate the covariance of noise matrix for all masked pixels in the image.
  1351 +
  1352 + /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
  1353 + /// @param avg is a pointer to memory of size B that stores the average spectrum
  1354 + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
  1355 + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){
  1356 + if (header.interleave == envi_header::BSQ){
  1357 + std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl;
  1358 + exit(1);
  1359 + }
  1360 +
  1361 +
  1362 + else if (header.interleave == envi_header::BIL){
  1363 + std::cout<<"ERROR: calculating the covariance matrix of noise for a BIL file is impractical; convert to BIP first"<<std::endl;
  1364 + exit(1);
  1365 + }
  1366 +
  1367 + else if (header.interleave == envi_header::BIP){
  1368 + if (header.data_type == envi_header::float32)
  1369 + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
  1370 + else if (header.data_type == envi_header::float64)
  1371 + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
  1372 + else{
  1373 + std::cout << "ERROR: unidentified data type" << std::endl;
  1374 + exit(1);
  1375 + }
  1376 + }
  1377 + return false;
  1378 + }
1375 1379
1376 /// Crop a region of the image and save it to a new file. 1380 /// Crop a region of the image and save it to a new file.
1377 1381
@@ -1635,7 +1639,81 @@ public: @@ -1635,7 +1639,81 @@ public:
1635 } 1639 }
1636 exit(1); 1640 exit(1);
1637 } 1641 }
1638 -}; 1642 +
  1643 + void multiply(std::string outfile, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1644 + header.save(outfile + ".hdr");
  1645 + if (header.interleave == envi_header::BSQ){
  1646 + if (header.data_type == envi_header::float32)
  1647 + ((bsq<float>*)file)->multiply(outfile, v, mask, PROGRESS);
  1648 + else if (header.data_type == envi_header::float64)
  1649 + ((bsq<double>*)file)->multiply(outfile, v, mask, PROGRESS);
  1650 + else{
  1651 + std::cout << "ERROR: unidentified data type" << std::endl;
  1652 + exit(1);
  1653 + }
  1654 + }
  1655 +
  1656 + else if (header.interleave == envi_header::BIL){
  1657 + if (header.data_type == envi_header::float32)
  1658 + ((bil<float>*)file)->multiply(outfile, v, mask, PROGRESS);
  1659 + else if (header.data_type == envi_header::float64)
  1660 + ((bil<double>*)file)->multiply(outfile, v, mask, PROGRESS);
  1661 + else{
  1662 + std::cout << "ERROR: unidentified data type" << std::endl;
  1663 + exit(1);
  1664 + }
  1665 + }
  1666 +
  1667 + else if (header.interleave == envi_header::BIP){
  1668 + if (header.data_type == envi_header::float32)
  1669 + ((bip<float>*)file)->multiply(outfile, v, mask, PROGRESS);
  1670 + else if (header.data_type == envi_header::float64)
  1671 + ((bip<double>*)file)->multiply(outfile, v, mask, PROGRESS);
  1672 + else{
  1673 + std::cout << "ERROR: unidentified data type" << std::endl;
  1674 + exit(1);
  1675 + }
  1676 + }
  1677 + exit(1);
  1678 + }
  1679 +
  1680 + void add(std::string outfile, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1681 + header.save(outfile + ".hdr");
  1682 + if (header.interleave == envi_header::BSQ){
  1683 + if (header.data_type == envi_header::float32)
  1684 + ((bsq<float>*)file)->add(outfile, v, mask, PROGRESS);
  1685 + else if (header.data_type == envi_header::float64)
  1686 + ((bsq<double>*)file)->add(outfile, v, mask, PROGRESS);
  1687 + else{
  1688 + std::cout << "ERROR: unidentified data type" << std::endl;
  1689 + exit(1);
  1690 + }
  1691 + }
  1692 +
  1693 + else if (header.interleave == envi_header::BIL){
  1694 + if (header.data_type == envi_header::float32)
  1695 + ((bil<float>*)file)->add(outfile, v, mask, PROGRESS);
  1696 + else if (header.data_type == envi_header::float64)
  1697 + ((bil<double>*)file)->add(outfile, v, mask, PROGRESS);
  1698 + else{
  1699 + std::cout << "ERROR: unidentified data type" << std::endl;
  1700 + exit(1);
  1701 + }
  1702 + }
  1703 +
  1704 + else if (header.interleave == envi_header::BIP){
  1705 + if (header.data_type == envi_header::float32)
  1706 + ((bip<float>*)file)->add(outfile, v, mask, PROGRESS);
  1707 + else if (header.data_type == envi_header::float64)
  1708 + ((bip<double>*)file)->add(outfile, v, mask, PROGRESS);
  1709 + else{
  1710 + std::cout << "ERROR: unidentified data type" << std::endl;
  1711 + exit(1);
  1712 + }
  1713 + }
  1714 + exit(1);
  1715 + }
  1716 +}; //end ENVI
1639 1717
1640 } //end namespace rts 1718 } //end namespace rts
1641 1719
stim/envi/envi_header.h
@@ -440,9 +440,24 @@ struct envi_header @@ -440,9 +440,24 @@ struct envi_header
440 } 440 }
441 441
442 /// Convert a wavelength to a band index (or a pair of surrounding band indices) 442 /// Convert a wavelength to a band index (or a pair of surrounding band indices)
  443 + /// if the file doesn't specify wavelengths, w is assumed to be a band index
443 std::vector<size_t> band_index(double w){ 444 std::vector<size_t> band_index(double w){
444 std::vector<size_t> idx; //create an empty array of indices 445 std::vector<size_t> idx; //create an empty array of indices
445 - if(w < wavelength[0] || w > wavelength[bands-1]) return idx; //if the wavelength range is outside of the file, return an empty array 446 + if(wavelength.size() == 0){ //if a wavelength vector doesn't exist, assume the passed value is a band
  447 + if(w < 0 || w > bands-1) return idx; //if the band is outside the given band range, return an empty vector
  448 + size_t low, high; //allocate space for the floor and ceiling
  449 + low = (size_t)std::floor(w); //calculate the floor
  450 + high = (size_t)std::ceil(w); //calculate the ceiling
  451 + if(low == high) //if the floor and ceiling are the same
  452 + idx.push_back(low); //return a vector with one element (the given w matches a band exactly)
  453 + else{
  454 + idx.resize(2); //otherwise return the floor and ceiling
  455 + idx[0] = low;
  456 + idx[1] = high;
  457 + }
  458 + return idx;
  459 + }
  460 + else if(w < wavelength[0] || w > wavelength[bands-1]) return idx; //if the wavelength range is outside of the file, return an empty array
446 461
447 for(size_t b = 0; b < bands; b++){ //for each band in the wavelength vector 462 for(size_t b = 0; b < bands; b++){ //for each band in the wavelength vector
448 if(wavelength[b] == w){ //if an exact match is found 463 if(wavelength[b] == w){ //if an exact match is found
@@ -149,13 +149,13 @@ public: @@ -149,13 +149,13 @@ public:
149 for(size_t i = 0; i < R[0] * R[1]; i++){ //for each pixel in that page 149 for(size_t i = 0; i < R[0] * R[1]; i++){ //for each pixel in that page
150 150
151 #ifdef _WIN32 151 #ifdef _WIN32
152 - if(!_finite(page[i])){ //if the value at index i is finite 152 + if(!_finite(page[i])){ //if the value at index i is not finite
153 #else 153 #else
154 - if(!std::isfinite(page[i])){ //C++11 implementation 154 + if(!std::isfinite(page[i])){ //C++11 implementation
155 #endif 155 #endif
156 size_t x, y, b; 156 size_t x, y, b;
157 - xyb(p * R[0] * R[1] + i, x, y, b); //find the 3D coordinates of the value  
158 - mask[ y * X() + x ] = 0; //mask the pixel (it's not bad) 157 + xyb(p * R[0] * R[1] + i, x, y, b); //find the 3D coordinates of the value
  158 + mask[ y * X() + x ] = 0; //remove the pixel (it's bad)
159 } 159 }
160 } 160 }
161 if(PROGRESS) progress = (double)(p + 1) / (double)R[2] * 100; 161 if(PROGRESS) progress = (double)(p + 1) / (double)R[2] * 100;
@@ -202,6 +202,24 @@ public: @@ -202,6 +202,24 @@ public:
202 } 202 }
203 } 203 }
204 204
  205 + void read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
  206 + size_t d[3]; //position in the binary coordinate system
  207 + size_t sd[3]; //size in the binary coordinate system
  208 +
  209 + d[O[0]] = x; //set the point in the binary coordinate system
  210 + d[O[1]] = y;
  211 + d[O[2]] = z;
  212 +
  213 + sd[O[0]] = sx; //set the size in the binary coordinate system
  214 + sd[O[1]] = sy;
  215 + sd[O[2]] = sz;
  216 +
  217 + if(!binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2])){
  218 + std::cout<<"error reading block in stim::hsi: ("<<d[0]<<", "<<d[1]<<", "<<d[2]<<") - ["<<sd[0]<<", "<<sd[1]<<", "<<sd[2]<<"]"<<std::endl;
  219 + exit(1);
  220 + }
  221 + }
  222 +
205 }; 223 };
206 224
207 } //end namespace STIM 225 } //end namespace STIM
stim/gl/gl_texture.h
1 #ifndef STIM_GL_TEXTURE_H 1 #ifndef STIM_GL_TEXTURE_H
2 #define STIM_GL_TEXTURE_H 2 #define STIM_GL_TEXTURE_H
3 3
4 -  
5 -  
6 -  
7 -/*  
8 -includes not necessary (yet)  
9 -  
10 -#include <iterator>  
11 -#include <algorithm>  
12 -  
13 -  
14 -*/  
15 -  
16 #include <math.h> 4 #include <math.h>
17 #include <iostream> 5 #include <iostream>
18 #include <vector> 6 #include <vector>
19 #include "../grids/image_stack.h" 7 #include "../grids/image_stack.h"
20 -#include <GL/glut.h>  
21 -//#include <GL/glext.h>  
22 -#include "./error.h" 8 +//Visual Studio requires GLEW
  9 +#ifdef _WIN32
  10 + #include <GL/glew.h>
  11 +#endif
  12 +//#include <GL/glut.h>
  13 +#include <stim/gl/error.h>
23 namespace stim{ 14 namespace stim{
24 15
25 /* 16 /*
@@ -27,195 +18,282 @@ class gl_texture @@ -27,195 +18,282 @@ class gl_texture
27 Uses image_stack class in order to create a texture object. 18 Uses image_stack class in order to create a texture object.
28 */ 19 */
29 20
30 -template<typename T>  
31 -class gl_texture : public virtual image_stack<T> 21 +template<typename T, typename F = float>
  22 +class gl_texture : public virtual image_stack<T, F>
32 { 23 {
33 - private:  
34 - /// Sets the internal texture_type, based on the data  
35 - /// size. Either 3D, 2D, 1D textures.  
36 -  
37 - void  
38 - setTextureType()  
39 - {  
40 - if (R[3] > 1)  
41 - texture_type = GL_TEXTURE_3D;  
42 - else if (R[3] == 1 && R[2] == 0)  
43 - texture_type = GL_TEXTURE_1D;  
44 - else if (R[3] == 1)  
45 - texture_type = GL_TEXTURE_2D;  
46 - }  
47 protected: 24 protected:
48 - std::string path; 25 + //std::string path;
49 GLuint texID; //OpenGL object 26 GLuint texID; //OpenGL object
50 GLenum texture_type; //1D, 2D, 3D 27 GLenum texture_type; //1D, 2D, 3D
51 - GLint interpType;  
52 - GLint texWrap;  
53 - GLenum type;  
54 - GLenum format; 28 + GLint interpolation;
  29 + GLint wrap;
  30 + GLenum cpu_type;
  31 + GLenum gpu_type;
  32 + GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.)
55 using image_stack<T>::R; 33 using image_stack<T>::R;
56 - using image_stack<T>::S; 34 + //using image_stack<T>::S;
57 using image_stack<T>::ptr; 35 using image_stack<T>::ptr;
58 - using image_stack<T>::samples; 36 +
  37 + /// Sets the internal texture_type, based on the data dimensions
  38 + void setTextureType(){
  39 + if (R[3] > 1) //if the third dimension is greater than 1
  40 + texture_type = GL_TEXTURE_3D; //this is a 3D texture
  41 + else if (R[2] > 1) //if the second dimension is greater than 1
  42 + texture_type = GL_TEXTURE_2D; //this is a 2D texture
  43 + else if (R[1] > 1) //if the dimension value is greater than 1
  44 + texture_type = GL_TEXTURE_1D; //this is a 1D texture
  45 + }
  46 +
  47 + //initializes important variables
  48 + void init() {
  49 + texID = 0; //initialize texture ID to zero, default if OpenGL returns an error
  50 + //memset(R, 0, sizeof(size_t));
  51 + //memset(grid<T, 4, F>::S, 0, sizeof(F));
  52 + }
  53 +
  54 + //guesses the color format of the texture
  55 + GLenum guess_format(){
  56 + size_t channels = R[0];
  57 + switch(channels){
  58 + case 1:
  59 + return GL_LUMINANCE;
  60 + case 2:
  61 + return GL_RG;
  62 + case 3:
  63 + return GL_RGB;
  64 + case 4:
  65 + return GL_RGBA;
  66 + default:
  67 + std::cout<<"Error in stim::gl_texture - unable to guess texture format based on number of channels ("<<R[4]<<")"<<std::endl;
  68 + exit(1);
  69 + }
  70 + }
  71 +
  72 + //guesses the OpenGL CPU data type based on T
  73 + GLenum guess_cpu_type(){
  74 + // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution
  75 +
  76 + //if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C());
  77 + //if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C());
  78 + //if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C());
  79 + //if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C());
  80 + //if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C());
  81 + //if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C());
  82 + //if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C());
  83 +
  84 + if(typeid(T) == typeid(unsigned char)) return GL_UNSIGNED_BYTE;
  85 + if(typeid(T) == typeid(char)) return GL_BYTE;
  86 + if(typeid(T) == typeid(unsigned short)) return GL_UNSIGNED_SHORT;
  87 + if(typeid(T) == typeid(short)) return GL_SHORT;
  88 + if(typeid(T) == typeid(unsigned int)) return GL_UNSIGNED_INT;
  89 + if(typeid(T) == typeid(int)) return GL_INT;
  90 + if(typeid(T) == typeid(float)) return GL_FLOAT;
  91 +
  92 + std::cout<<"ERROR in stim::gl_texture - no valid data type found"<<std::endl;
  93 + exit(1);
  94 + }
  95 +
  96 + //Guesses the "internal format" of the texture to closely approximate the original format
  97 + GLint guess_gpu_type(){
  98 + switch(format){
  99 + case GL_LUMINANCE:
  100 + switch(cpu_type){
  101 + case GL_BYTE:
  102 + case GL_UNSIGNED_BYTE:
  103 + return GL_LUMINANCE8;
  104 + case GL_SHORT:
  105 + case GL_UNSIGNED_SHORT:
  106 + return GL_LUMINANCE16;
  107 + case GL_INT:
  108 + case GL_UNSIGNED_INT:
  109 + return GL_LUMINANCE32I_EXT;
  110 + case GL_FLOAT:
  111 + return GL_LUMINANCE32F_ARB;
  112 + default:
  113 + std::cout<<"error in stim::gl_texture - unable to guess GPU internal format"<<std::endl;
  114 + exit(1);
  115 + }
  116 + case GL_RGB:
  117 + switch(cpu_type){
  118 + case GL_BYTE:
  119 + case GL_UNSIGNED_BYTE:
  120 + return GL_RGB8;
  121 + case GL_SHORT:
  122 + case GL_UNSIGNED_SHORT:
  123 + return GL_RGB16;
  124 + case GL_INT:
  125 + case GL_UNSIGNED_INT:
  126 + return GL_RGB32I;
  127 + case GL_FLOAT:
  128 + return GL_RGB32F;
  129 + default:
  130 + std::cout<<"error in stim::gl_texture - unable to guess GPU internal format"<<std::endl;
  131 + exit(1);
  132 + }
  133 + case GL_RGBA:
  134 + switch(cpu_type){
  135 + case GL_BYTE:
  136 + case GL_UNSIGNED_BYTE:
  137 + return GL_RGBA8;
  138 + case GL_SHORT:
  139 + case GL_UNSIGNED_SHORT:
  140 + return GL_RGBA16;
  141 + case GL_INT:
  142 + case GL_UNSIGNED_INT:
  143 + return GL_RGBA32I;
  144 + case GL_FLOAT:
  145 + return GL_RGBA32F;
  146 + default:
  147 + std::cout<<"error in stim::gl_texture - unable to guess GPU internal format"<<std::endl;
  148 + exit(1);
  149 + }
  150 + default:
  151 + std::cout<<"error in stim::gl_texture - unable to guess GPU internal format"<<std::endl;
  152 + exit(1);
  153 + }
  154 + }
  155 + /// creates this texture in the current OpenGL context
  156 + void generate_texture(){
  157 + glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
  158 + CHECK_OPENGL_ERROR
  159 + glGenTextures(1, &texID);
  160 + CHECK_OPENGL_ERROR
  161 + glBindTexture(texture_type, texID);
  162 + CHECK_OPENGL_ERROR
  163 + glTexParameteri(texture_type, GL_TEXTURE_MIN_FILTER, interpolation);
  164 + CHECK_OPENGL_ERROR
  165 + glTexParameteri(texture_type, GL_TEXTURE_MAG_FILTER, interpolation);
  166 + CHECK_OPENGL_ERROR
  167 + switch(texture_type){
  168 + case GL_TEXTURE_3D:
  169 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_S, wrap);
  170 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_T, wrap);
  171 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_R, wrap);
  172 + glTexImage3D(texture_type, 0, gpu_type, (GLsizei)R[1], (GLsizei)R[2], (GLsizei)R[3], 0, format, cpu_type, ptr);
  173 + break;
  174 + case GL_TEXTURE_2D:
  175 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_S, wrap);
  176 + CHECK_OPENGL_ERROR
  177 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_T, wrap);
  178 + CHECK_OPENGL_ERROR
  179 + glTexImage2D(texture_type, 0, gpu_type, (GLsizei)R[1], (GLsizei)R[2], 0, format, cpu_type, ptr);
  180 + CHECK_OPENGL_ERROR
  181 + break;
  182 + case GL_TEXTURE_1D:
  183 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_S, wrap);
  184 + CHECK_OPENGL_ERROR
  185 + glTexImage1D(texture_type, 0, gpu_type, (GLsizei)R[1], 0, format, cpu_type, ptr);
  186 + CHECK_OPENGL_ERROR
  187 + break;
  188 + default:
  189 + std::cout<<"Error in stim::gl_texture - unrecognized texture target when generating texture"<<std::endl;
  190 + exit(1);
  191 + break;
  192 + }
  193 + CHECK_OPENGL_ERROR
  194 + }
  195 + void guess_parameters(){
  196 + setTextureType(); //set the texture type: 1D, 2D, 3D
  197 + format = guess_format(); //guess the texture format based on the number of image channels
  198 + cpu_type = guess_cpu_type(); //guess the CPU type based on the template
  199 + gpu_type = guess_gpu_type(); //guess the GPU type based on the format and template
  200 + }
59 201
60 public: 202 public:
61 203
62 ///default constructor 204 ///default constructor
63 - gl_texture()  
64 - {  
65 - 205 + gl_texture( GLint interp = GL_LINEAR, //default to linear interpolation
  206 + GLint twrap = GL_REPEAT) //default repeating the texture at the edges
  207 + : image_stack<T>() {
  208 + init(); //initialize the texture with NULL values
  209 + interpolation = interp; //store the interpolation type
  210 + wrap = twrap; //store the wrap type
66 } 211 }
67 212
68 - ///@param string path to the directory with the image files.  
69 - ///Creates an instance of the gl_texture object with a path to the data. 213 + ///@param is a mask indicating the files to load
  214 + ///Creates an instance of the gl_texture object and initializes it with a file list
  215 +
  216 + gl_texture(std::string file_mask, GLint interp = GL_LINEAR, GLint twrap = GL_REPEAT){
  217 + init();
  218 + interpolation = interp; //store interpolation type
  219 + wrap = twrap; //store wrap type
  220 + load_images(file_mask);
  221 + }
70 222
71 - gl_texture(std::string file_mask)  
72 - {  
73 - //path = file_mask;  
74 - image_stack<T>::load_images(file_mask);  
75 - setTextureType(); 223 + ///Creates an instance of gl_texture and initializes with a file list
  224 + ///@param file_list is a list of files
  225 + ///@param interp is the type of texture interpolation (GL_LINEAR, GL_NEAREST)
  226 + ///@param twrap is the type of texture wrapping
  227 + gl_texture(std::vector<std::string> file_list, GLint interp = GL_LINEAR, GLint twrap = GL_REPEAT){
  228 + init();
  229 + interpolation = interp;
  230 + wrap = twrap;
  231 + load_images(file_list);
  232 + }
  233 +
  234 + ///Attaches the texture to the current OpenGL context and makes it ready to render
  235 + void attach(){
  236 + if(texID == 0) generate_texture(); //generate the texture if it doesn't already exist
  237 + else{
  238 + std::cout<<"Texture has already been attached to a context."<<std::endl;
  239 + exit(1);
  240 + }
  241 + }
  242 +
  243 + //binds a texture to be the current render source
  244 + void bind(){
  245 + glBindTexture(texture_type, texID); //bind the texture to the appropriate texture target
  246 + CHECK_OPENGL_ERROR
76 } 247 }
77 248
78 ///returns the dimentions of the data in the x, y, z directions. 249 ///returns the dimentions of the data in the x, y, z directions.
79 - vec<int>  
80 - getSize()  
81 - { 250 + vec<int> getSize(){
82 stim::vec<int> size(R[1], R[2], R[3]); 251 stim::vec<int> size(R[1], R[2], R[3]);
83 return size; 252 return size;
84 } 253 }
85 254
86 - ///@param GLint interp --GL_LINEAR, GL_NEAREST...  
87 - ///@param GLint twrap --GL_REPEAR, GL_CLAMP_TO_EDGE...  
88 - ///@param GLenum dataType --GL_UNSIGNED_BYTE, GL_FLOAT16...  
89 - ///@param GLenum dataFormat--GL_LUMINANCE, GL_RGB...  
90 - /// Texture paramenters.  
91 - void  
92 - setTexParam(GLint interp = GL_LINEAR,  
93 - GLint twrap = GL_CLAMP_TO_EDGE,  
94 - GLenum dataType = GL_UNSIGNED_BYTE,  
95 - GLenum dataFormat = GL_LUMINANCE)  
96 - {  
97 - interpType = interp;  
98 - texWrap = twrap;  
99 - type = dataType;  
100 - format = dataFormat; 255 + void getSize(size_t& x, size_t& y, size_t& z) {
  256 + x = R[0]; y = R[1]; z = R[2];
101 } 257 }
102 258
103 ///@param x size of the voxel in x direction 259 ///@param x size of the voxel in x direction
104 ///@param y size of the voxel in y direction 260 ///@param y size of the voxel in y direction
105 ///@param z size of the voxel in z direction 261 ///@param z size of the voxel in z direction
106 /// Sets the dimenstions of the voxels. 262 /// Sets the dimenstions of the voxels.
107 - void  
108 - setDims(float x, float y, float z)  
109 - {  
110 - S[1] = x;  
111 - S[2] = y;  
112 - S[3] = z; 263 + void setSpacing(float sx, float sy, float sz){
  264 + grid<T, 4, F>::S[1] = sx;
  265 + grid<T, 4, F>::S[2] = sy;
  266 + grid<T, 4, F>::S[3] = sz;
113 } 267 }
114 268
115 ///Returns a stim::vec that contains the x, y, z sizes of the voxel. 269 ///Returns a stim::vec that contains the x, y, z sizes of the voxel.
116 - vec<float>  
117 - getDims()  
118 - {  
119 - vec<float> dims(S[1], S[2], S[3]); 270 + vec<float> getDims(){
  271 + vec<float> dims(grid<T, 4, F>::S[1], grid<T, 4, F>::S[2], grid<T, 4, F>::S[3]);
120 return dims; 272 return dims;
121 - } 273 + }
122 274
123 - ///@param file_Path location of the directory with the files  
124 - /// Sets the path and calls the loader on that path.  
125 - void  
126 - setPath(std::string file_path)  
127 - {  
128 - path = file_path;  
129 - image_stack<T>::load_images(path.append("/*.jpg"));  
130 - setTextureType(); 275 + /// Loads a series of files specified by a list of strings
  276 + /// @param file_list is the vector of file names as strings
  277 + void load_images(std::vector<std::string> file_list){
  278 + image_stack<T, F>::load_images(file_list); //load the images
  279 + guess_parameters();
131 } 280 }
132 -  
133 - /// Returns an std::string path associated with an instance of the gl_texture class.  
134 - std::string  
135 - getPath()  
136 - {  
137 - return path; 281 +
  282 + ///@param file_mask specifies the file(s) to be loaded
  283 + /// Sets the path and calls the loader on that path.
  284 + void load_images(std::string file_mask){
  285 + image_stack<T>::load_images(file_mask); //load images
  286 + guess_parameters();
138 } 287 }
139 288
140 /// Returns the GLuint id of the texture created by/associated with the 289 /// Returns the GLuint id of the texture created by/associated with the
141 - /// instance of the gl_texture class.  
142 -  
143 - GLuint  
144 - getTexture()  
145 - { 290 + /// instance of the gl_texture class.
  291 + GLuint getTexture(){
146 return texID; 292 return texID;
147 } 293 }
148 294
149 - /// Creates a texture and from the loaded data and  
150 - /// assigns that texture to texID  
151 - //TO DO :::: 1D textures  
152 - //TO DO:::add methods for handling the cases of T  
153 - // and convert them to GL equivalent.  
154 - // i.e. an overloaded function that handles paramenter conversion.  
155 - void  
156 - createTexture()  
157 - {  
158 - glPixelStorei(GL_UNPACK_ALIGNMENT,1);  
159 - glGenTextures(1, &texID);  
160 - glBindTexture(texture_type, texID);  
161 - glTexParameteri(texture_type,  
162 - GL_TEXTURE_MIN_FILTER,  
163 - interpType);  
164 - glTexParameteri(texture_type,  
165 - GL_TEXTURE_MAG_FILTER,  
166 - interpType);  
167 - switch(texture_type)  
168 - {  
169 - case GL_TEXTURE_3D:  
170 - glTexParameteri(texture_type,  
171 - GL_TEXTURE_WRAP_S,texWrap);  
172 - // GL_REPEAT);  
173 - // GL_CLAMP_TO_EDGE);  
174 - glTexParameteri(texture_type,  
175 - GL_TEXTURE_WRAP_T,texWrap);  
176 - // GL_REPEAT);  
177 - // GL_CLAMP_TO_EDGE);  
178 - glTexParameteri(texture_type,  
179 - GL_TEXTURE_WRAP_R,texWrap);  
180 - // GL_REPEAT);  
181 - // GL_CLAMP_TO_EDGE);  
182 - glTexImage3D(texture_type,  
183 - 0,  
184 - // GL_RGB16,  
185 - 1,  
186 - R[1],  
187 - R[2],  
188 - R[3],  
189 - 0,  
190 - format,  
191 - type,  
192 - ptr);  
193 - //GL_UNSIGNED_BYTE can be TYPES, convert to GL equivalents  
194 - glPixelStorei(GL_PACK_ALIGNMENT,1);  
195 - break;  
196 - case GL_TEXTURE_2D:  
197 - glTexParameteri(texture_type,  
198 - GL_TEXTURE_WRAP_S, texWrap);  
199 - glTexParameteri(texture_type,  
200 - GL_TEXTURE_WRAP_T, texWrap);  
201 - glTexImage2D(texture_type,  
202 - 0,  
203 - 1,  
204 - R[1],  
205 - R[2],  
206 - 0,  
207 - format,  
208 - type,  
209 - ptr);  
210 - break;  
211 - }  
212 - }  
213 - ///Temporary methods for debugging and testing are below.  
214 - ///Self-explanatory.  
215 295
216 - T*  
217 - getData()  
218 - { 296 + T* getData(){
219 return ptr; 297 return ptr;
220 } 298 }
221 299
@@ -15,78 +15,119 @@ namespace stim{ @@ -15,78 +15,119 @@ namespace stim{
15 Functions are provided for saving and loading binary data. 15 Functions are provided for saving and loading binary data.
16 16
17 **/ 17 **/
18 -template<typename T, unsigned int D = 1> 18 +template<typename T, unsigned int D = 1, typename F = float>
19 class grid{ 19 class grid{
20 20
21 protected: 21 protected:
22 22
23 - stim::vec<unsigned long> R; //elements in each dimension  
24 - stim::vec<float> S; 23 + size_t R[D]; //elements in each dimension
  24 + F S[D]; //spacing between element samples
25 T* ptr; //pointer to the data (on the GPU or CPU) 25 T* ptr; //pointer to the data (on the GPU or CPU)
26 26
27 - ///Return the total number of values in the binary file  
28 - unsigned long samples(){  
29 -  
30 - unsigned long s = 1;  
31 - for(unsigned int d = 0; d < D; d++)  
32 - s *= R[d];  
33 -  
34 - return s;  
35 - } 27 +
36 28
37 ///Initializes a grid by allocating the necessary memory and setting all values to zero 29 ///Initializes a grid by allocating the necessary memory and setting all values to zero
38 - void init(){  
39 -  
40 - //calculate the total number of values  
41 - unsigned long S = samples();  
42 -  
43 - //allocate memory to store the grid  
44 - ptr = (T*)malloc(sizeof(T) * S);  
45 -  
46 - //initialize the memory to zero  
47 - memset(ptr, 0, sizeof(T) * S); 30 + void init(){
  31 + ptr = NULL; //initialize the data pointer to NULL
  32 + memset(R, 0, sizeof(size_t) * D); //set the resolution to zero
  33 + for(size_t d = 0; d < D; d++) S[d] = (F)1.0; //initialize the spacing to unity
  34 + }
48 35
  36 + void alloc(){
  37 + if(ptr != NULL) free(ptr); //if memory has already been allocated, free it
  38 + size_t N = samples(); //calculate the total number of values
  39 + ptr = (T*)calloc(sizeof(T), N); //allocate memory to store the grid
49 } 40 }
50 41
51 public: 42 public:
52 43
53 ///Default constructor doesn't do anything 44 ///Default constructor doesn't do anything
54 grid(){ 45 grid(){
55 - ptr = NULL; //set the pointer to NULL so that we know nothing is allocated 46 + init();
56 } 47 }
57 48
58 ///Constructor used to specify the grid size as a vector 49 ///Constructor used to specify the grid size as a vector
59 50
60 /// @param _R is a vector describing the grid resolution 51 /// @param _R is a vector describing the grid resolution
61 - grid( stim::vec<unsigned long> _R){  
62 -  
63 - //set the grid resolution  
64 - R = _R;  
65 - 52 + grid( stim::vec<size_t> _R){
  53 + for (size_t d = 0; d < D; d++)
  54 + R[d] = _R[d];
66 init(); 55 init();
67 } 56 }
68 57
  58 + ///Return the total number of values in the binary file
  59 + size_t samples(){
  60 + size_t s = 1;
  61 + for(size_t d = 0; d < D; d++)
  62 + s *= R[d];
  63 + return s;
  64 + }
  65 +
  66 + ///Return the number of bytes in the binary file
  67 + size_t bytes(){
  68 + return samples() * sizeof(T);
  69 + }
  70 +
69 void 71 void
70 - setDim(stim::vec<float> s)  
71 - {  
72 - S = s; 72 + setDim(stim::vec<float> s){
  73 + for(size_t d = 0; d < D; d++)
  74 + S[d] = s[d];
73 } 75 }
74 76
75 ///Constructor used to specify the grid size as a set of parameters 77 ///Constructor used to specify the grid size as a set of parameters
76 -  
77 /// @param X0... is a list of values describing the grid size along each dimension 78 /// @param X0... is a list of values describing the grid size along each dimension
78 - grid( unsigned long X0, ...){ 79 + /*grid( size_t X0, ...){
  80 + R[0] = X0; //set the grid size of the first dimension
  81 + va_list ap; //get a variable list
  82 + va_start(ap, X0); //start the variable list at the first element
  83 + for(size_t d = 1; d<D; d++) //for each additional element
  84 + R[d] = va_arg(ap, size_t); //read the value from the variable list as a size_t
  85 + va_end(ap);
  86 + init(); //initialize the grid
  87 + }*/
  88 +
  89 + ///Set the spacing between grid sample points
  90 + /// @param X0... is a list of values describing the grid sample spacing
  91 + /*void spacing(F X0, ...) {
  92 + S[0] = X0; //set the grid size of the first dimension
  93 + va_list ap; //get a variable list
  94 + va_start(ap, X0); //start the variable list at the first element
  95 + for (size_t d = 1; d<D; d++) //for each additional element
  96 + S[d] = va_arg(ap, F); //read the value from the variable list as a size_t
  97 + va_end(ap);
  98 + }*/
  99 +
  100 + /// Set the spacing between grid sample points for the specified dimension
  101 + void spacing(size_t d, F sp){
  102 + if(d < D) S[d] = sp;
  103 + else{
  104 + std::cout<<"error in stim::grid::spacing() - insufficient dimensions"<<std::endl;
  105 + exit(1);
  106 + }
  107 + }
79 108
80 - R[0] = X0; 109 + /// Return the spacing for a given dimension
  110 + F spacing(size_t d){
  111 + if(d < D) return S[d];
  112 + else{
  113 + std::cout<<"error in stim::grid::spacing() - insufficient dimensions"<<std::endl;
  114 + exit(1);
  115 + }
  116 + }
81 117
82 - va_list ap;  
83 - va_start(ap, X0);  
84 - for(unsigned int d = 1; d<D; d++)  
85 - R[d] = va_arg(ap, unsigned long);  
86 - va_end(ap); 118 + /// Get the sample spacing for the given dimension
  119 + F get_spacing(size_t d) {
  120 + return S[d];
  121 + }
87 122
88 - init(); 123 + /// Get the size of the grid along the specified dimension
  124 + F size(size_t d){
  125 + return (F)R[d] * S[d];
  126 + }
89 127
  128 + /// Return the number of samples
  129 + size_t samples(size_t d){
  130 + return R[d];
90 } 131 }
91 132
92 ///Writes the binary data to disk 133 ///Writes the binary data to disk
@@ -94,13 +135,9 @@ public: @@ -94,13 +135,9 @@ public:
94 /// @param filename is the name of the binary file to be written 135 /// @param filename is the name of the binary file to be written
95 void write(std::string filename){ 136 void write(std::string filename){
96 137
97 - std::fstream file;  
98 -  
99 - //open the file as binary for reading  
100 - file.open(filename.c_str(), std::ios::out | std::ios::binary);  
101 -  
102 - //write file to disk  
103 - file.write((char *)ptr, samples() * sizeof(T)); 138 + std::fstream file;
  139 + file.open(filename.c_str(), std::ios::out | std::ios::binary); //open the file as binary for reading
  140 + file.write((char *)ptr, samples() * sizeof(T)); //write file to disk
104 } 141 }
105 142
106 ///Loads a binary file from disk 143 ///Loads a binary file from disk
@@ -108,66 +145,52 @@ public: @@ -108,66 +145,52 @@ public:
108 /// @param filename is the name of the file containing the binary data 145 /// @param filename is the name of the file containing the binary data
109 /// @param S is the size of the binary file along each dimension 146 /// @param S is the size of the binary file along each dimension
110 /// @param header is the size of the header in bytes 147 /// @param header is the size of the header in bytes
111 - void read(std::string filename, stim::vec<unsigned long> S, unsigned long header = 0){  
112 -  
113 - R = S; //set the sample resolution  
114 -  
115 - //allocate space for the data  
116 - init();  
117 -  
118 - std::fstream file;  
119 -  
120 - //open the file as binary for writing  
121 - file.open(filename.c_str(), std::ios::in | std::ios::binary);  
122 -  
123 - //seek past the header  
124 - file.seekg(header, std::ios::beg);  
125 -  
126 -  
127 - //read the data  
128 - file.read((char *)ptr, samples() * sizeof(T)); 148 + void read(std::string filename, stim::vec<size_t> X, unsigned long header = 0){
  149 + for(size_t d = 0; d < D; d++)
  150 + R[d] = X[d]; //set the sample resolution
  151 + init(); //allocate space for the data
  152 + std::fstream file;
  153 + file.open(filename.c_str(), std::ios::in | std::ios::binary); //open the file as binary for writing
  154 + file.seekg(header, std::ios::beg); //seek past the header
  155 + file.read((char *)ptr, samples() * sizeof(T)); //read the data
129 } 156 }
130 157
131 ///Gets a single value from the grid given a set of coordinates 158 ///Gets a single value from the grid given a set of coordinates
132 -  
133 /// @param x0... is a list of coordinates specifying the desired value 159 /// @param x0... is a list of coordinates specifying the desired value
134 - T get(unsigned long x0, ...){ 160 + /*T get(unsigned long x0, ...){
135 161
136 - va_list ap; 162 + va_list ap; //create a variable list
137 163
138 - unsigned long F = 1;  
139 - unsigned long p = x0; 164 + unsigned long F = 1; //initialize the dimension size to 1
  165 + unsigned long idx = x0;
140 166
141 - va_start(ap, x0);  
142 - for(unsigned int d = 1; d<D; d++){  
143 - F *= R[d-1];  
144 - p += va_arg(ap, unsigned int) * F; 167 + va_start(ap, x0); //start a variable list
  168 + for(unsigned int d = 1; d<D; d++){ //for each dimension
  169 + F *= R[d-1]; //get the size of the first dimension
  170 + idx += va_arg(ap, unsigned int) * F; //increment the index
145 } 171 }
146 va_end(ap); 172 va_end(ap);
147 173
148 - return ptr[p];  
149 - } 174 + return ptr[idx]; //access the appropriate element and return the value
  175 + }*/
150 176
151 ///Sets a value in the grid 177 ///Sets a value in the grid
152 178
153 /// @param value is the grid point value 179 /// @param value is the grid point value
154 /// @x0... is the coordinate of the value to be set 180 /// @x0... is the coordinate of the value to be set
155 - void set(T value, unsigned long x0, ...){  
156 -  
157 - va_list ap;  
158 -  
159 - unsigned long F = 1;  
160 - unsigned long p = x0;  
161 -  
162 - va_start(ap, x0);  
163 - for(unsigned int d = 1; d<D; d++){  
164 - F *= R[d-1];  
165 - p += va_arg(ap, unsigned int) * F; 181 + /*void set(T value, unsigned long x0, ...){
  182 + va_list ap; //create a variable list
  183 + unsigned long F = 1; //initialize the dimension counter to 1
  184 + unsigned long idx = x0; //initialize the index to the first variable
  185 +
  186 + va_start(ap, x0); //start the variable list
  187 + for(unsigned int d = 1; d<D; d++){ //for each dimension
  188 + F *= R[d - 1];
  189 + idx += va_arg(ap, unsigned int) * F; //update the index
166 } 190 }
167 va_end(ap); 191 va_end(ap);
168 -  
169 - ptr[p] = value;  
170 - } 192 + ptr[idx] = value; //set the value at the indexed location
  193 + }*/
171 194
172 195
173 ///Outputs grid data as a string 196 ///Outputs grid data as a string
@@ -179,13 +202,11 @@ public: @@ -179,13 +202,11 @@ public:
179 for(unsigned int d = 0; d<D; d++){ 202 for(unsigned int d = 0; d<D; d++){
180 if(d!=0) result<<", "; 203 if(d!=0) result<<", ";
181 result<<R[d]; 204 result<<R[d];
182 -  
183 } 205 }
184 -  
185 result<<"]"<<std::endl; 206 result<<"]"<<std::endl;
186 207
187 //calculate the number of values to output 208 //calculate the number of values to output
188 - unsigned long nV = min((unsigned long long)R[0], (unsigned long long)10); 209 + unsigned long nV = std::min((unsigned long long)R[0], (unsigned long long)10);
189 210
190 for(unsigned long v = 0; v<nV; v++){ 211 for(unsigned long v = 0; v<nV; v++){
191 result<<ptr[v]; 212 result<<ptr[v];
stim/grids/image_stack.h
@@ -8,83 +8,112 @@ @@ -8,83 +8,112 @@
8 8
9 namespace stim{ 9 namespace stim{
10 10
11 -/**This class is used to load 3D grid data from stacks of images  
12 - The class uses a 4D grid object, where the first dimension is a color value.  
13 -**/  
14 -template<typename T>  
15 -class image_stack : public virtual stim::grid<T, 4>{ 11 +///This class is used to load 3D grid data from stacks of images
  12 +// The class uses a 4D grid object, where the first dimension is a color value.
  13 +template<typename T, typename F = float>
  14 +class image_stack : public virtual stim::grid<T, 4, F>{
16 15
17 enum image_type {stimAuto, stimMono, stimRGB, stimRGBA}; 16 enum image_type {stimAuto, stimMono, stimRGB, stimRGBA};
18 17
19 protected: 18 protected:
20 - using stim::grid<T, 4>::S; 19 + //using stim::grid<T, 4>::S;
21 using stim::grid<T, 4>::R; 20 using stim::grid<T, 4>::R;
22 using stim::grid<T, 4>::ptr; 21 using stim::grid<T, 4>::ptr;
23 - using stim::grid<T, 4>::samples;  
24 using stim::grid<T, 4>::read; 22 using stim::grid<T, 4>::read;
25 23
26 public: 24 public:
  25 + //default constructor
  26 + image_stack() : grid<T, 4>() {
27 27
28 - ///Load an image stack based on a file mask. Images are loaded in alphanumeric order. 28 + }
29 29
30 - /// @param file_mask is the mask describing images to be loaded  
31 - void load_images(std::string file_mask){ 30 + /// Overloads grid::samples() to return the number of samples associated with a given spatial dimension
  31 + /// this is necessary because R[0] stores the color
  32 + size_t samples(size_t d){
  33 + return grid<T, 4, F>::samples(d + 1);
  34 + }
32 35
33 - stim::filename file_path(file_mask); 36 + size_t samples(){
  37 + return R[1] * R[2] * R[3]; //return the number of spatial samples
  38 + }
  39 +
  40 + /// Returns the number of color channels
  41 + size_t channels(){
  42 + return R[0];
  43 + }
  44 +
  45 + /// Overloads grid::size() to return the size of the grid associated with a given spatial dimension
  46 + F size(size_t d){
  47 + return grid<T, 4, F>::size(d + 1);
  48 + }
34 49
35 - //get the list of files  
36 - std::vector<stim::filename> file_list = file_path.get_list(); 50 + /// Sets the spacing between samples in the image stack
  51 + void spacing(F sx, F sy, F sz){
  52 + grid<T, 4, F>::S[1] = sx; //set the sample spacing for the appropriate spatial dimension
  53 + grid<T, 4, F>::S[2] = sy;
  54 + grid<T, 4, F>::S[3] = sz;
  55 + }
  56 +
  57 + F spacing(size_t d){
  58 + return grid<T, 4, F>::spacing(d + 1);
  59 + }
  60 +
  61 + /// Overloads the spacing parameter to set the size of the grid associated with a given spatial dimension
  62 + //void spacing(F sx, F sy = 1.0f, F sz = 1.0f){
  63 + // grid<T, 4, F>::spacing((F)1.0, sx, sy, sz);
  64 + //}
  65 +
  66 + /// Load all of the images specified by a list of strings
  67 + /// @param string_list is a list of file names specifying images
  68 + void load_images(std::vector<std::string> string_list){
37 69
38 //if there are no matching files, exit 70 //if there are no matching files, exit
39 - if(file_list.size() == 0){ 71 + if(string_list.size() == 0){
40 std::cout<<"STIM ERROR (image_stack): No matching files for loading a stack."<<std::endl; 72 std::cout<<"STIM ERROR (image_stack): No matching files for loading a stack."<<std::endl;
41 exit(1); 73 exit(1);
42 } 74 }
43 - //for(int i = 0; i < file_list.size(); i++)  
44 - // std::cout << file_list[i].str() << std::endl;  
45 75
46 - //load the first image and set all of the image_stack properties  
47 - stim::image<T> I(file_list[0].str()); 76 + stim::image<T> I(string_list[0]); //load the first image and set all of the image_stack proparties
48 77
49 - //set the image resolution and number of channels  
50 - R.push(I.channels());  
51 - R.push(I.width());  
52 - R.push(I.height());  
53 - R.push(file_list.size()); 78 + R[0] = I.channels(); //set the number of color channels
  79 + R[1] = I.width(); //set the stack height and width based on the image size
  80 + R[2] = I.height();
  81 + R[3] = string_list.size(); //set the stack z-resolution based on the number of images
54 82
55 - //allocate storage space  
56 - ptr = (T*)malloc(sizeof(T) * samples()); 83 + ptr = (T*)malloc(grid<T, 4, F>::bytes()); //allocate storage space
57 84
58 //load and copy each image into the grid 85 //load and copy each image into the grid
59 - for(unsigned int i = 0; i<R[3]; i++){  
60 - //load the image  
61 - stim::image<T> I(file_list[i].str()); 86 + for(unsigned int i = 0; i<R[3]; i++){ //for each image in the list
  87 + stim::image<T> I(string_list[i]); //load the image
  88 + I.get_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ]); //retrieve the interlaced data from the image - store it in the grid
  89 + }
  90 + }
62 91
63 - //retrieve the interlaced data from the image - store it in the grid  
64 - I.get_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ]);  
65 - 92 + /// Load a stack of images based on a file mask. Images are loaded in alphanumeric order
  93 + /// @param file_mask is the mask describing the images to be loaded
  94 + void load_images(std::string file_mask){
  95 + stim::filename file_path(file_mask); //get the path for the images
  96 + std::vector<stim::filename> file_list = file_path.get_list(); //get the list of files
  97 + std::vector<std::string> string_list(file_list.size()); //allocate space for an array of strings
  98 + for(size_t f = 0; f < file_list.size(); f++){ //for each file name in the list
  99 + string_list[f] = file_list[f].str(); //convert the file name to a string
66 } 100 }
  101 + load_images(string_list); //load all of the images in the list
67 } 102 }
68 103
69 ///Inserts image I into slot i. 104 ///Inserts image I into slot i.
70 /// @param stim::image<T> I; image to insert. 105 /// @param stim::image<T> I; image to insert.
71 /// @int I, where to place the image. 106 /// @int I, where to place the image.
72 - void insert_image(stim::image<T> I, int i)  
73 - { 107 + void insert_image(stim::image<T> I, int i){
74 I.get_interleaved_rgb(&ptr[i *R[0] *R[1] *R[2] ]); 108 I.get_interleaved_rgb(&ptr[i *R[0] *R[1] *R[2] ]);
75 } 109 }
76 110
77 ///Saves a single page to an image file 111 ///Saves a single page to an image file
78 /// @param file_name is the name of the image file to be created 112 /// @param file_name is the name of the image file to be created
79 /// @param i is the page to be saved 113 /// @param i is the page to be saved
80 - void save_image(std::string file_name, unsigned int i){  
81 -  
82 - //create an image  
83 - stim::image<T> I;  
84 -  
85 - //retrieve the interlaced data from the image - store it in the grid  
86 - I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]);  
87 - 114 + void save_image(std::string file_name, unsigned int i){
  115 + stim::image<T> I; //create an image
  116 + I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
88 I.save(file_name); 117 I.save(file_name);
89 } 118 }
90 119
@@ -96,10 +125,10 @@ public: @@ -96,10 +125,10 @@ public:
96 void 125 void
97 set_dim(float x, float y, float z) 126 set_dim(float x, float y, float z)
98 { 127 {
99 - S[0] = 1;  
100 - S[1] = x;  
101 - S[2] = y;  
102 - S[3] = z; 128 + grid<T, 4, F>::S[0] = 1;
  129 + grid<T, 4, F>::S[1] = x;
  130 + grid<T, 4, F>::S[2] = y;
  131 + grid<T, 4, F>::S[3] = z;
103 } 132 }
104 133
105 ///set dimensions of the grid. 134 ///set dimensions of the grid.
@@ -124,12 +153,6 @@ public: @@ -124,12 +153,6 @@ public:
124 153
125 stim::filename file_path(file_mask); 154 stim::filename file_path(file_mask);
126 155
127 - //if the file path is relative, update it with the current working directory  
128 -// if(file_path.is_relative()){  
129 -// stim::filename wd = stim::filename::cwd();  
130 -// file_path = wd.get_relative(file_mask);  
131 -// }  
132 -  
133 //create a list of file names 156 //create a list of file names
134 std::vector<std::string> file_list = stim::wildcards::increment(file_path.str(), 0, R[3]-1, 1); 157 std::vector<std::string> file_list = stim::wildcards::increment(file_path.str(), 0, R[3]-1, 1);
135 158
stim/image/image.h
@@ -159,7 +159,10 @@ public: @@ -159,7 +159,10 @@ public:
159 std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl; 159 std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl;
160 exit(1); 160 exit(1);
161 } 161 }
162 - allocate(cvImage.cols, cvImage.rows, cvImage.channels()); //allocate space for the image 162 + int cols = cvImage.cols;
  163 + int rows = cvImage.rows;
  164 + int channels = cvImage.channels();
  165 + allocate(cols, rows, channels); //allocate space for the image
163 unsigned char* cv_ptr = (unsigned char*)cvImage.data; 166 unsigned char* cv_ptr = (unsigned char*)cvImage.data;
164 if(C() == 1) //if this is a single-color image, just copy the data 167 if(C() == 1) //if this is a single-color image, just copy the data
165 memcpy(img, cv_ptr, bytes()); 168 memcpy(img, cv_ptr, bytes());
@@ -217,6 +217,7 @@ public: @@ -217,6 +217,7 @@ public:
217 return result; 217 return result;
218 } 218 }
219 219
  220 +//#ifndef __NVCC__
220 /// Outputs the vector as a string 221 /// Outputs the vector as a string
221 std::string str() const{ 222 std::string str() const{
222 std::stringstream ss; 223 std::stringstream ss;
@@ -234,6 +235,7 @@ public: @@ -234,6 +235,7 @@ public:
234 235
235 return ss.str(); 236 return ss.str();
236 } 237 }
  238 +//#endif
237 239
238 size_t size(){ return 3; } 240 size_t size(){ return 3; }
239 241
stim/parser/arguments.h
@@ -523,7 +523,11 @@ namespace stim{ @@ -523,7 +523,11 @@ namespace stim{
523 std::string arg(size_t a){ 523 std::string arg(size_t a){
524 return args[a]; 524 return args[a];
525 } 525 }
526 - 526 +
  527 + /// Returns an std::vector of argument strings
  528 + std::vector<std::string> arg_vector(){
  529 + return args;
  530 + }
527 ///Returns an object describing the argument 531 ///Returns an object describing the argument
528 532
529 /// @param _name is the name of the requested argument 533 /// @param _name is the name of the requested argument
stim/parser/filename.h
@@ -110,12 +110,17 @@ protected: @@ -110,12 +110,17 @@ protected:
110 unix_dir = unix_dir.substr(2, unix_dir.length()-2); //extract the directory structure 110 unix_dir = unix_dir.substr(2, unix_dir.length()-2); //extract the directory structure
111 } 111 }
112 112
113 - if(unix_dir.at(0) == '/'){ //if there is a leading slash  
114 - relative = false; //the path is not relative  
115 - unix_dir = unix_dir.substr(1, unix_dir.length() - 1); //remove the slash 113 + if(drive.size() != 0){
  114 + relative = false;
  115 + }
  116 + if(unix_dir.size() > 0){ //if there is a directory specified, remove surrounding slashes
  117 + if(unix_dir[0] == '/'){ //if there is a leading slash
  118 + relative = false; //the path is not relative
  119 + unix_dir = unix_dir.substr(1, unix_dir.length() - 1); //remove the slash
  120 + }
  121 + if(unix_dir[unix_dir.size()-1] == '/')
  122 + unix_dir = unix_dir.substr(0, unix_dir.length() - 1);
116 } 123 }
117 - if(unix_dir.at(unix_dir.size()-1) == '/')  
118 - unix_dir = unix_dir.substr(0, unix_dir.length() - 1);  
119 124
120 path = stim::parser::split(unix_dir, '/'); //split up the directory structure 125 path = stim::parser::split(unix_dir, '/'); //split up the directory structure
121 126
stim/visualization/camera.h
@@ -186,6 +186,7 @@ public: @@ -186,6 +186,7 @@ public:
186 d = vec3<float>(0, 0, 1); 186 d = vec3<float>(0, 0, 1);
187 up = vec3<float>(0, 1, 0); 187 up = vec3<float>(0, 1, 0);
188 focus = 1; 188 focus = 1;
  189 + fov = 60;
189 190
190 } 191 }
191 192
stim/visualization/colormap.h
@@ -4,13 +4,13 @@ @@ -4,13 +4,13 @@
4 #include <string> 4 #include <string>
5 #include <stdlib.h> 5 #include <stdlib.h>
6 #include <cmath> 6 #include <cmath>
7 -#include "cublas_v2.h"  
8 7
9 #ifdef _WIN32 8 #ifdef _WIN32
10 #include <float.h> 9 #include <float.h>
11 #endif 10 #endif
12 11
13 #ifdef __CUDACC__ 12 #ifdef __CUDACC__
  13 +#include "cublas_v2.h"
14 #include <stim/cuda/cudatools/error.h> 14 #include <stim/cuda/cudatools/error.h>
15 #endif 15 #endif
16 16