Commit af825cb9e3a875e096d138d147b325ab67c2beb0

Authored by Pavel Govyadinov
2 parents f7b84fb2 cf5b4c92

fixed merge conflicts

matlab/loadAgilent.m 0 → 100644
  1 +function S = loadAgilent(filename)
  2 +
  3 +fid = fopen(filename); %open the file for reading
  4 +fseek(fid, 9, 'bof'); %skip past the first 9 bytes of the header
  5 +
  6 +bands = fread(fid, 1, 'int16'); %read the number of bands in the file
  7 +fseek(fid, 13, 'cof'); %skip the next 13 bytes in the header
  8 +
  9 +samples = fread(fid, 1, 'int16'); %read the number of samples (X)
  10 +lines = fread(fid, 1, 'int16'); %read the number of lines (Y)
  11 +
  12 +fseek(fid, 1020, 'bof'); %skip past the entire header
  13 +S = fread(fid, [samples lines*bands], 'float32'); %read all the data
  14 +S = reshape(S, [samples, lines, bands]);
  15 +fclose(fid); %close the file
  16 +
  17 +
... ...
stim/biomodels/cellset.h 0 → 100644
  1 +#ifndef STIM_CELLSET_H
  2 +#define STIM_CELLSET_H
  3 +
  4 +#include <stim/math/vec3.h>
  5 +#include <vector>
  6 +#include <unordered_map>
  7 +#include <fstream>
  8 +
  9 +namespace stim{
  10 +
  11 +class cellset{
  12 +private:
  13 + static const char delim = ' ';
  14 +protected:
  15 + std::vector<double*> cells; //vector storing field data for each cell
  16 + std::unordered_map<std::string, size_t> fields; //unordered map storing field->index information for each field
  17 + size_t ip[3]; //hard code to position indices (for speed)
  18 +
  19 + void init(){
  20 +
  21 + }
  22 +public:
  23 + /// Constructor - create an empty cell set
  24 + cellset(){
  25 + init();
  26 + }
  27 +
  28 + /// Constructor - load a cellset from a file
  29 + cellset(std::string filename){
  30 + init(); //initialize an empty cellset
  31 + load(filename); //load the cellset from an existing file
  32 + }
  33 +
  34 + /// Loads a cellset from a file
  35 + void load(std::string filename){
  36 + std::ifstream infile(filename);
  37 + std::string header; //allocate space for the file header
  38 + std::getline(infile, header); //get the file header
  39 +
  40 + // break the header into fields
  41 + std::stringstream ss(header); //create a string stream
  42 + std::string field; //store a single field name
  43 + size_t i = 0; //current field index
  44 + while (std::getline(ss, field, delim)) { //split the header into individual fields
  45 + std::pair<std::string, size_t> p(field, i); //create a pair associating the header name with the index
  46 + fields.insert(p); //insert the pair into the fields map
  47 + i++; //increment the data index
  48 + }
  49 + size_t nfields = fields.size(); //store the number of fields for each cell
  50 +
  51 + //load each cell and all associated fields
  52 + std::string cell_line; //string holds all information for a cell
  53 + std::list<std::string> cell_list; //list will be temporary storage for the cell fields
  54 + while(std::getline(infile, cell_line)){ //for each cell entry
  55 + cell_list.push_back(cell_line); //push the cell entry into the list
  56 + }
  57 +
  58 + //convert the list into actual data
  59 + size_t ncells = cell_list.size(); //count the number of cells
  60 + cells.resize(ncells); //allocate enough space in the array to store all cells
  61 + for(size_t c = 0; c < ncells; c++){ //for each cell entry in the list
  62 + cells[c] = (double*) malloc(sizeof(double) * nfields); //allocate enough space for each field
  63 + std::stringstream fss(cell_list.front()); //turn the string representing the cell list into a stringstream
  64 + for(size_t f = 0; f < nfields; f++){ //for each field
  65 + fss>>cells[c][f]; //load the field
  66 + }
  67 + cell_list.pop_front(); //pop the read string off of the front of the list
  68 + }
  69 + infile.close(); //close the input file
  70 +
  71 + ip[0] = fields["x"]; //hard code the position indices for speed
  72 + ip[1] = fields["y"]; // this assumes all cells have positions
  73 + ip[2] = fields["z"];
  74 + }
  75 +
  76 + /// Return the value a specified field for a cell
  77 + /// @param c is the cell index
  78 + /// @param f is the field
  79 + double value(size_t c, std::string f){
  80 + size_t idx = fields[f];
  81 + return cells[c][idx];
  82 + }
  83 +
  84 + /// returns an ID used to look up a field
  85 + bool exists(std::string f){
  86 + std::unordered_map<std::string, size_t>::iterator iter = fields.find(f);
  87 + if(iter == fields.end()) return false;
  88 + else return true;
  89 + }
  90 +
  91 + /// Return the position of cell [i]
  92 + stim::vec3<double> p(size_t i){
  93 + stim::vec3<double> pos(cells[i][ip[0]], cells[i][ip[1]], cells[i][ip[2]]);
  94 + return pos;
  95 + }
  96 +
  97 + /// Return the number of cells in the set
  98 + size_t size(){
  99 + return cells.size();
  100 + }
  101 +
  102 + /// Return the maximum value of a field in this cell set
  103 + double max(std::string field){
  104 + size_t idx = fields[field]; //get the field index
  105 + size_t ncells = cells.size(); //get the total number of cells
  106 + double maxval, val; //stores the current and maximum values
  107 + for(size_t c = 0; c < ncells; c++){ //for each cell
  108 + val = cells[c][idx]; //get the field value for this cell
  109 + if(c == 0) maxval = val; //if this is the first cell, just assign the maximum
  110 + else if(val > maxval) maxval = val; // otherwise text for the size of val and assign it as appropriate
  111 + }
  112 + return maxval;
  113 + }
  114 +
  115 + /// Return the maximum value of a field in this cell set
  116 + double min(std::string field){
  117 + size_t idx = fields[field]; //get the field index
  118 + size_t ncells = cells.size(); //get the total number of cells
  119 + double minval, val; //stores the current and maximum values
  120 + for(size_t c = 0; c < ncells; c++){ //for each cell
  121 + val = cells[c][idx]; //get the field value for this cell
  122 + if(c == 0) minval = val; //if this is the first cell, just assign the maximum
  123 + else if(val < minval) minval = val; // otherwise text for the size of val and assign it as appropriate
  124 + }
  125 + return minval;
  126 + }
  127 +
  128 +
  129 +}; //end class cellset
  130 +}; //end namespace stim
  131 +
  132 +#endif
0 133 \ No newline at end of file
... ...
stim/cuda/ivote/local_max.cuh
... ... @@ -14,7 +14,7 @@ namespace stim{
14 14  
15 15 // calculate the 2D coordinates for this current thread.
16 16 int xi = blockIdx.x * blockDim.x + threadIdx.x;
17   - int yi = blockIdx.y;
  17 + int yi = blockIdx.y * blockDim.y + threadIdx.y;
18 18  
19 19 if(xi >= x || yi >= y)
20 20 return;
... ... @@ -63,8 +63,10 @@ namespace stim{
63 63 void gpu_local_max(T* gpuCenters, T* gpuVote, T final_t, unsigned int conn, unsigned int x, unsigned int y){
64 64  
65 65 unsigned int max_threads = stim::maxThreadsPerBlock();
66   - dim3 threads(max_threads, 1);
67   - dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
  66 + /*dim3 threads(max_threads, 1);
  67 + dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);*/
  68 + dim3 threads( sqrt(max_threads), sqrt(max_threads) );
  69 + dim3 blocks(x/threads.x + 1, y/threads.y + 1);
68 70  
69 71 //call the kernel to find the local maximum.
70 72 cuda_local_max <<< blocks, threads >>>(gpuCenters, gpuVote, final_t, conn, x, y);
... ...
stim/cuda/ivote/re_sample.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_RE_SAMPLE_H
  2 +#define STIM_CUDA_RE_SAMPLE_H
  3 +
  4 +#include <iostream>
  5 +#include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/templates/gaussian_blur.cuh>
  8 +
  9 +namespace stim{
  10 + namespace cuda{
  11 +
  12 + template<typename T>
  13 + __global__ void cuda_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
  14 +
  15 + unsigned int sigma_ds = 1/resize;
  16 + unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  17 + unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  18 +
  19 +
  20 + // calculate the 2D coordinates for this current thread.
  21 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  22 + int yi = blockIdx.y;
  23 + // convert 2D coordinates to 1D
  24 + int i = yi * x + xi;
  25 +
  26 + if(xi< x && yi< y){
  27 + if(xi%sigma_ds==0){
  28 + if(yi%sigma_ds==0){
  29 + gpuI[i] = gpuI0[(yi/sigma_ds)*x_ds + xi/sigma_ds];
  30 + }
  31 + }
  32 + else gpuI[i] = 0;
  33 +
  34 + //int x_org = xi * sigma_ds ;
  35 + //int y_org = yi * sigma_ds ;
  36 + //int i_org = y_org * x + x_org;
  37 + //gpuI[i] = gpuI0[i_org];
  38 + }
  39 +
  40 + }
  41 +
  42 +
  43 + /// Applies a Gaussian blur to a 2D image stored on the GPU
  44 + template<typename T>
  45 + void gpu_re_sample(T* gpuI, T* gpuI0, T resize, unsigned int x, unsigned int y){
  46 +
  47 +
  48 + //unsigned int sigma_ds = 1/resize;
  49 + //unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  50 + //unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  51 +
  52 + //get the number of pixels in the image
  53 + //unsigned int pixels_ds = x_ds * y_ds;
  54 +
  55 + unsigned int max_threads = stim::maxThreadsPerBlock();
  56 + dim3 threads(max_threads, 1);
  57 + dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
  58 +
  59 + //stim::cuda::gpu_gaussian_blur2<float>(gpuI0, sigma_ds,x ,y);
  60 +
  61 + //resample the image
  62 + cuda_re_sample<float> <<< blocks, threads >>>(gpuI, gpuI0, resize, x, y);
  63 +
  64 + }
  65 +
  66 + /// Applies a Gaussian blur to a 2D image stored on the CPU
  67 + template<typename T>
  68 + void cpu_re_sample(T* out, T* in, T resize, unsigned int x, unsigned int y){
  69 +
  70 + //get the number of pixels in the image
  71 + unsigned int pixels = x*y;
  72 + unsigned int bytes = sizeof(T) * pixels;
  73 +
  74 + unsigned int sigma_ds = 1/resize;
  75 + unsigned int x_ds = (x/sigma_ds + (x %sigma_ds == 0 ? 0:1));
  76 + unsigned int y_ds = (y/sigma_ds + (y %sigma_ds == 0 ? 0:1));
  77 + unsigned int bytes_ds = sizeof(T) * x_ds * y_ds;
  78 +
  79 +
  80 +
  81 + //allocate space on the GPU for the original image
  82 + T* gpuI0;
  83 + cudaMalloc(&gpuI0, bytes_ds);
  84 +
  85 +
  86 + //copy the image data to the GPU
  87 + cudaMemcpy(gpuI0, in, bytes_ds, cudaMemcpyHostToDevice);
  88 +
  89 + //allocate space on the GPU for the down sampled image
  90 + T* gpuI;
  91 + cudaMalloc(&gpuI, bytes);
  92 +
  93 + //run the GPU-based version of the algorithm
  94 + gpu_re_sample<T>(gpuI, gpuI0, resize, x, y);
  95 +
  96 + //copy the image data to the GPU
  97 + cudaMemcpy(re_img, gpuI, bytes_ds, cudaMemcpyHostToDevice);
  98 +
  99 + cudaFree(gpuI0);
  100 + cudeFree(gpuI);
  101 + }
  102 +
  103 + }
  104 +}
  105 +
  106 +#endif
0 107 \ No newline at end of file
... ...
stim/cuda/ivote/update_dir_global.cuh renamed to stim/cuda/ivote/update_dir_bb.cuh
1   -#ifndef STIM_CUDA_UPDATE_DIR_GLOBALD_H
2   -#define STIM_CUDA_UPDATE_DIR_GLOBAL_H
  1 +#ifndef STIM_CUDA_UPDATE_DIR_BB_H
  2 +#define STIM_CUDA_UPDATE_DIR_BB_H
3 3  
4 4 # include <iostream>
5 5 # include <cuda.h>
... ... @@ -7,8 +7,7 @@
7 7 #include <stim/cuda/sharedmem.cuh>
8 8 #include <stim/visualization/aabb2.h>
9 9 #include <stim/visualization/colormap.h>
10   -#include <math.h>
11   -#include "cpyToshare.cuh"
  10 +#include <math.h>
12 11  
13 12 //#define RMAX_TEST 8
14 13  
... ... @@ -76,68 +75,6 @@ namespace stim{
76 75 gpuDir[i] = atan2((T)max_dy, (T)max_dx);
77 76 }
78 77  
79   - // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
80   - template<typename T>
81   - __global__ void leila_cuda_update_dir(T* gpuDir, T* gpuVote, T* gpuGrad, T* gpuTable, T phi, int rmax, int x, int y){
82   -
83   -
84   - // calculate the 2D coordinates for this current thread.
85   - int xi = blockIdx.x * blockDim.x + threadIdx.x;
86   - int yi = blockIdx.y * blockDim.y + threadIdx.y;
87   -
88   - if(xi >= x || yi >= y) return; //if the index is outside of the image, terminate the kernel
89   -
90   - int i = yi * x + xi; // convert 2D coordinates to 1D
91   -
92   - float theta = gpuGrad[2*i]; // calculate the voting direction based on the grtadient direction - global memory fetch
93   - gpuDir[i] = 0; //initialize the vote direction to zero
94   - float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter
95   - int id_x = 0; // define two local variables for the x and y position of the maximum
96   - int id_y = 0;
97   -
98   - int x_table = 2*rmax +1; // compute the size of window which will be checked for finding the voting area for this voter
99   - int rmax_sq = rmax * rmax;
100   - int tx_rmax = threadIdx.x + rmax;
101   - float atan_angle;
102   - float vote_c;
103   - int xidx, yidx, yr_sq, xr_sq;
104   - for(int yr = -rmax; yr <= rmax; yr++){
105   - yidx = yi + yr; //compute the index into the image
106   - if (yidx >= 0 && yidx < y){ //if the current y-index is inside the image
107   - yr_sq = yr * yr; //compute the square of yr, to save time later
108   - for(int xr = -rmax; xr <= rmax; xr++){
109   - xidx = xi + xr;
110   - if(xidx >= 0 && xidx < x){
111   - xr_sq = xr * xr;
112   - unsigned int ind_t = (rmax - yr) * x_table + rmax - xr;
113   -
114   - // calculate the angle between the voter and the current pixel in x and y directions
115   - atan_angle = gpuTable[ind_t];
116   - //atan_angle = atan2((T)yr, (T)xr);
117   -
118   - // check if the current pixel is located in the voting area of this voter.
119   - if (((xr_sq + yr_sq)< rmax_sq) && (abs(atan_angle - theta) <phi)){
120   -
121   - vote_c = gpuVote[yidx * x + xidx]; // find the vote value for the current counter
122   - // compare the vote value of this pixel with the max value to find the maxima and its index.
123   - if (vote_c>max) {
124   -
125   - max = vote_c;
126   - id_x = xr;
127   - id_y = yr;
128   - }
129   - }
130   - }
131   - }
132   - }
133   - }
134   -
135   - unsigned int ind_m = (rmax - id_y) * x_table + (rmax - id_x);
136   - float new_angle = gpuTable[ind_m];
137   -
138   - if(xi < x && yi < y)
139   - gpuDir[i] = new_angle;
140   - } //end kernel
141 78  
142 79  
143 80 // this kernel updates the gradient direction by the calculated voting direction.
... ... @@ -168,9 +105,7 @@ namespace stim{
168 105 HANDLE_ERROR( cudaMalloc(&gpuDir, bytes) );
169 106  
170 107 unsigned int max_threads = stim::maxThreadsPerBlock();
171   - //dim3 threads(min(x, max_threads), 1);
172   - //dim3 blocks(x/threads.x, y);
173   -
  108 +
174 109 dim3 threads( sqrt(max_threads), sqrt(max_threads) );
175 110 dim3 blocks(x/threads.x + 1, y/threads.y + 1);
176 111  
... ... @@ -188,12 +123,12 @@ namespace stim{
188 123  
189 124 //call the kernel to calculate the new voting direction
190 125 cuda_update_dir <<< blocks, threads, shared_mem_req>>>(gpuDir, gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
191   - stim::gpu2image<T>(gpuDir, "dir_david.bmp", x, y, -pi, pi, stim::cmBrewer);
  126 + //stim::gpu2image<T>(gpuDir, "dir_david.bmp", x, y, -pi, pi, stim::cmBrewer);
192 127  
193 128 //exit(0);
194 129  
195   - threads = dim3( sqrt(max_threads), sqrt(max_threads) );
196   - blocks = dim3(x/threads.x + 1, y/threads.y + 1);
  130 + //threads = dim3( sqrt(max_threads), sqrt(max_threads) );
  131 + //blocks = dim3(x/threads.x + 1, y/threads.y + 1);
197 132  
198 133 //call the kernel to update the gradient direction
199 134 cuda_update_grad <<< blocks, threads >>>(gpuGrad, gpuDir, x , y);
... ...
stim/cuda/ivote/david_update_dir_global.cuh renamed to stim/cuda/ivote/update_dir_threshold_global.cuh
1   -#ifndef STIM_CUDA_UPDATE_DIR_GLOBALD_H
2   -#define STIM_CUDA_UPDATE_DIR_GLOBAL_H
  1 +#ifndef STIM_CUDA_UPDATE_DIR_THRESHOLD_GLOBALD_H
  2 +#define STIM_CUDA_UPDATE_DIR_THRESHOLD_GLOBAL_H
3 3  
4 4 # include <iostream>
5 5 # include <cuda.h>
6 6 #include <stim/cuda/cudatools.h>
7 7 #include <stim/cuda/sharedmem.cuh>
8   -#include <math.h>
9   -#include "cpyToshare.cuh"
10   -
11   -#define RMAX_TEST 8
  8 +#include "cpyToshare.cuh"
12 9  
13 10 namespace stim{
14 11 namespace cuda{
15 12  
16 13 // this kernel calculates the voting direction for the next iteration based on the angle between the location of this voter and the maximum vote value in its voting area.
17 14 template<typename T>
18   - __global__ void cuda_update_dir(T* gpuDir, T* gpuVote, T* gpuGrad, T* gpuTable, T phi, int rmax, int x, int y){
19   - extern __shared__ T atan2_table[];
20   -
21   - //calculate the start point for this block
22   - //int bxi = blockIdx.x * blockDim.x;
23   -
24   - stim::cuda::sharedMemcpy(atan2_table, gpuTable, (2 * rmax + 1) * (2 * rmax + 1), threadIdx.x, blockDim.x);
  15 + __global__ void cuda_update_dir(T* gpuDir, T* gpuVote, T* gpuTh, T* gpuTable, T phi, int rmax, int th_size, int x, int y){
25 16  
26   - __syncthreads();
27 17  
28   - // calculate the 2D coordinates for this current thread.
29   - //int xi = bxi + threadIdx.x;
  18 +
  19 + // calculate the coordinate for this current thread.
30 20 int xi = blockIdx.x * blockDim.x + threadIdx.x;
31   - int yi = blockIdx.y * blockDim.y + threadIdx.y;
32   - if(xi >= x || yi >= y) return; //if the index is outside of the image, terminate the kernel
33   -
34   - int i = yi * x + xi; // convert 2D coordinates to 1D
  21 + // calculate the voting direction based on the grtadient direction
  22 + float theta = gpuTh[3*xi];
35 23  
36   - float theta = gpuGrad[2*i]; // calculate the voting direction based on the grtadient direction - global memory fetch
37   - gpuDir[i] = 0; //initialize the vote direction to zero
38   - float max = 0; // define a local variable to maximum value of the vote image in the voting area for this voter
39   - int id_x = 0; // define two local variables for the x and y position of the maximum
40   - int id_y = 0;
  24 + //calculate the position and x, y coordinations of this voter in the original image
  25 + unsigned int i_v = gpuTh[3*xi+2];
  26 + unsigned int y_v = i_v/x;
  27 + unsigned int x_v = i_v - (y_v*x);
41 28  
42   - int x_table = 2*rmax +1; // compute the size of window which will be checked for finding the voting area for this voter
  29 + //initialize the vote direction to zero
  30 + gpuDir[xi] = 0;
  31 +
  32 + // define a local variable to maximum value of the vote image in the voting area for this voter
  33 + float max = 0;
  34 +
  35 + // define two local variables for the x and y coordinations where the maximum happened
  36 + int id_x = 0;
  37 + int id_y = 0;
  38 +
  39 + // compute the size of window which will be checked for finding the voting area for this voter
  40 + int x_table = 2*rmax +1;
43 41 int rmax_sq = rmax * rmax;
44 42 int tx_rmax = threadIdx.x + rmax;
45   - float atan_angle;
46   - float vote_c;
47   - unsigned int ind_t;
48   - for(int yr = -rmax; yr <= rmax; yr++){ //for each counter in the y direction
49   - if (yi+yr >= 0 && yi + yr < y){ //if the counter exists (we aren't looking outside of the image)
50   - for(int xr = -rmax; xr <= rmax; xr++){ //for each counter in the x direction
51   - if((xr * xr + yr *yr)< rmax_sq){ //if the counter is within range of the voter
52   -
53   - ind_t = (rmax - yr) * x_table + rmax - xr; //calculate the index to the atan2 table
54   - atan_angle = atan2_table[ind_t]; //retrieve the direction vector from the table
55   -
56   - //atan_angle = atan2((float)yr, (float)xr);
57   -
58   - if (abs(atan_angle - theta) <phi){ // check if the current pixel is located in the voting angle of this voter.
59   - vote_c = gpuVote[(yi+yr)*x + (xi+xr)]; // find the vote value for the current counter
60   - if(vote_c>max) { // compare the vote value of this pixel with the max value to find the maxima and its index.
61   - max = vote_c;
62   - id_x = xr;
63   - id_y = yr;
64   - }
  43 + if(xi < th_size){
  44 +
  45 + for(int yr = -rmax; yr <= rmax; yr++){
  46 +
  47 + for(int xr = -rmax; xr <= rmax; xr++){
  48 +
  49 + unsigned int ind_t = (rmax - yr) * x_table + rmax - xr;
  50 +
  51 + // find the angle between the voter and the current pixel in x and y directions
  52 + float atan_angle = gpuTable[ind_t];
  53 +
  54 + // check if the current pixel is located in the voting area of this voter.
  55 + if (((xr * xr + yr *yr)< rmax_sq) && (abs(atan_angle - theta) <phi)){
  56 + // find the vote value for the current counter
  57 + float vote_c = gpuVote[(y_v+yr)*x + (x_v+xr)];
  58 + // compare the vote value of this pixel with the max value to find the maxima and its index.
  59 + if (vote_c>max) {
  60 +
  61 + max = vote_c;
  62 + id_x = xr;
  63 + id_y = yr;
65 64 }
66 65 }
67 66 }
68 67 }
69   - }
  68 +
70 69  
71   - unsigned int ind_m = (rmax - id_y) * x_table + (rmax - id_x);
72   - float new_angle = gpuTable[ind_m];
  70 + unsigned int ind_m = (rmax - id_y) * x_table + (rmax - id_x);
  71 + float new_angle = gpuTable[ind_m];
  72 + gpuDir[xi] = new_angle;
  73 + }
73 74  
74   - if(xi < x && yi < y)
75   - gpuDir[i] = new_angle;
76   - } //end kernel
  75 + }
77 76  
78 77 // this kernel updates the gradient direction by the calculated voting direction.
79 78 template<typename T>
80   - __global__ void cuda_update_grad(T* gpuGrad, T* gpuDir, int x, int y){
  79 + __global__ void cuda_update_grad(T* gpuTh, T* gpuDir, int th_size, int x, int y){
81 80  
82   - // calculate the 2D coordinates for this current thread.
  81 + // calculate the coordinate for this current thread.
83 82 int xi = blockIdx.x * blockDim.x + threadIdx.x;
84   - int yi = blockIdx.y * blockDim.y + threadIdx.y;
85   -
86   - // convert 2D coordinates to 1D
87   - int i = yi * x + xi;
88 83  
  84 +
89 85 //update the gradient image with the vote direction
90   - gpuGrad[2*i] = gpuDir[i];
  86 + gpuTh[3*xi] = gpuDir[xi];
91 87 }
92 88  
93 89 template<typename T>
94   - void gpu_update_dir(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
95   -
96   -
  90 + void gpu_update_dir(T* gpuVote, T* gpuTh, T* gpuTable, T phi, unsigned int rmax, unsigned int th_size, unsigned int x, unsigned int y){
97 91  
98 92 //calculate the number of bytes in the array
99   - unsigned int bytes = x * y * sizeof(T);
  93 + unsigned int bytes_th = th_size* sizeof(T);
100 94  
101 95 unsigned int max_threads = stim::maxThreadsPerBlock();
102   -
103   - dim3 threads(sqrt(max_threads), sqrt(max_threads));
104   - dim3 blocks(x/threads.x + 1, y/threads.y + 1);
105   -
106   -
  96 + dim3 threads(max_threads);
  97 + dim3 blocks(th_size/threads.x+1);
107 98  
108 99 // allocate space on the GPU for the updated vote direction
109 100 T* gpuDir;
110   - cudaMalloc(&gpuDir, bytes);
111   -
112   - size_t shared_mem = sizeof(T) * std::pow((2 * rmax + 1), 2);
113   - std::cout<<"Shared memory for atan2 table: "<<shared_mem<<std::endl;
  101 + cudaMalloc(&gpuDir, bytes_th);
114 102  
115 103 //call the kernel to calculate the new voting direction
116   - cuda_update_dir <<< blocks, threads, shared_mem>>>(gpuDir, gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  104 + cuda_update_dir <<< blocks, threads>>>(gpuDir, gpuVote, gpuTh, gpuTable, phi, rmax, th_size, x , y);
117 105  
118 106 //call the kernel to update the gradient direction
119   - cuda_update_grad <<< blocks, threads >>>(gpuGrad, gpuDir, x , y);
  107 + cuda_update_grad <<< blocks, threads >>>(gpuTh, gpuDir, th_size, x , y);
120 108  
121 109 //free allocated memory
122 110 cudaFree(gpuDir);
... ...
stim/cuda/ivote/vote_atomic_bb.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE_ATOMIC_BB_H
  2 +#define STIM_CUDA_VOTE_ATOMIC_BB_H
  3 +
  4 +# include <iostream>
  5 +# include <cuda.h>
  6 +#include <stim/cuda/cudatools.h>
  7 +#include <stim/cuda/sharedmem.cuh>
  8 +#include <stim/visualization/aabb2.h>
  9 +#include <stim/visualization/colormap.h>
  10 +#include <math.h>
  11 +
  12 +namespace stim{
  13 + namespace cuda{
  14 +
  15 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  16 + template<typename T>
  17 + __global__ void cuda_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, int rmax, int x, int y){
  18 +
  19 + extern __shared__ T S[];
  20 + T* shared_atan = S;
  21 + size_t n_table = (rmax * 2 + 1) * (rmax * 2 + 1);
  22 + stim::cuda::threadedMemcpy((char*)shared_atan, (char*)gpuTable, sizeof(T) * n_table, threadIdx.x, blockDim.x);
  23 +
  24 + // calculate the 2D coordinates for this current thread.
  25 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  26 + int yi = blockIdx.y * blockDim.y + threadIdx.y;
  27 +
  28 + if(xi >= x || yi >= y) return;
  29 + // convert 2D coordinates to 1D
  30 + int i = yi * x + xi;
  31 +
  32 + // calculate the voting direction based on the grtadient direction
  33 + float theta = gpuGrad[2*i];
  34 + //calculate the amount of vote for the voter
  35 + float mag = gpuGrad[2*i + 1];
  36 +
  37 +
  38 + stim::aabb2<int> bb(xi, yi); //initialize a bounding box at the current point
  39 + bb.insert(xi + ceil(rmax * cos(theta)), ceil(yi + rmax * sin(theta)));
  40 + bb.insert(xi + ceil(rmax * cos(theta - phi)), yi + ceil(rmax * sin(theta - phi))); //insert one corner of the triangle into the bounding box
  41 + bb.insert(xi + ceil(rmax * cos(theta + phi)), yi + ceil(rmax * sin(theta + phi))); //insert the final corner into the bounding box
  42 +
  43 + // compute the size of window which will be checked for finding the proper voters for this pixel
  44 + int x_table = 2*rmax +1;
  45 + int rmax_sq = rmax * rmax;
  46 +
  47 + int lut_i;
  48 + T dx_sq, dy_sq;
  49 +
  50 + bb.trim_low(0, 0); //make sure the bounding box doesn't go outside the image
  51 + bb.trim_high(x-1, y-1);
  52 +
  53 + int by, bx;
  54 + int dx, dy;
  55 +
  56 + unsigned int ind_g; //initialize the maximum vote value to zero
  57 + T alpha;
  58 +
  59 + for(by = bb.low[1]; by <= bb.high[1]; by++){ //for each element in the bounding box
  60 + dy = by - yi; //calculate the y coordinate of the current point relative to yi
  61 + dy_sq = dy * dy;
  62 + for(bx = bb.low[0]; bx <= bb.high[0]; bx++){
  63 + dx = bx - xi;
  64 + dx_sq = dx * dx;
  65 + lut_i = (rmax - dy) * x_table + rmax - dx;
  66 + alpha = shared_atan[lut_i];
  67 + if(dx_sq + dy_sq < rmax_sq && abs(alpha - theta) < phi){
  68 + ind_g = (by)*x + (bx);
  69 + atomicAdd(&gpuVote[ind_g], mag);
  70 +
  71 + }
  72 + }
  73 + }
  74 +
  75 + }
  76 +
  77 +
  78 + template<typename T>
  79 + void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  80 +
  81 +
  82 + unsigned int max_threads = stim::maxThreadsPerBlock();
  83 + dim3 threads( sqrt(max_threads), sqrt(max_threads) );
  84 + dim3 blocks(x/threads.x + 1, y/threads.y + 1);
  85 + size_t table_bytes = sizeof(T) * (rmax * 2 + 1) * (rmax * 2 + 1);
  86 + size_t shared_mem_req = table_bytes;// + template_bytes;
  87 + std::cout<<"Shared Memory required: "<<shared_mem_req<<std::endl;
  88 + size_t shared_mem = stim::sharedMemPerBlock();
  89 + if(shared_mem_req > shared_mem){
  90 + std::cout<<"Error: insufficient shared memory for this implementation of cuda_update_dir()."<<std::endl;
  91 + exit(1);
  92 + }
  93 +
  94 + //call the kernel to do the voting
  95 + cuda_vote <<< blocks, threads, shared_mem_req>>>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  96 +
  97 + }
  98 +
  99 +
  100 + template<typename T>
  101 + void cpu_vote(T* cpuVote, T* cpuGrad,T* cpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  102 +
  103 + //calculate the number of bytes in the array
  104 + unsigned int bytes = x * y * sizeof(T);
  105 +
  106 + //calculate the number of bytes in the atan2 table
  107 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(T);
  108 +
  109 + //allocate space on the GPU for the Vote Image
  110 + T* gpuVote;
  111 + cudaMalloc(&gpuVote, bytes);
  112 +
  113 + //allocate space on the GPU for the input Gradient image
  114 + T* gpuGrad;
  115 + HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes*2));
  116 +
  117 + //copy the Gradient Magnitude data to the GPU
  118 + HANDLE_ERROR(cudaMemcpy(gpuGrad, cpuGrad, bytes*2, cudaMemcpyHostToDevice));
  119 +
  120 + //allocate space on the GPU for the atan2 table
  121 + T* gpuTable;
  122 + HANDLE_ERROR(cudaMalloc(&gpuTable, bytes_table));
  123 +
  124 + //copy the atan2 values to the GPU
  125 + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice));
  126 +
  127 + //call the GPU version of the vote calculation function
  128 + gpu_vote<T>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  129 +
  130 + //copy the Vote Data back to the CPU
  131 + cudaMemcpy(cpuVote, gpuVote, bytes, cudaMemcpyDeviceToHost) ;
  132 +
  133 + //free allocated memory
  134 + cudaFree(gpuTable);
  135 + cudaFree(gpuVote);
  136 + cudaFree(gpuGrad);
  137 + }
  138 +
  139 + }
  140 +}
  141 +
  142 +#endif
0 143 \ No newline at end of file
... ...
stim/cuda/ivote/vote_atomic_shared.cuh
... ... @@ -5,7 +5,7 @@
5 5 # include <cuda.h>
6 6 #include <stim/cuda/cudatools.h>
7 7 #include <stim/cuda/sharedmem.cuh>
8   -#include "cpyToshare.cuh"
  8 +
9 9 //#include "writebackshared.cuh"
10 10 namespace stim{
11 11 namespace cuda{
... ...
stim/cuda/ivote/vote_shared.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE_SHARED_H
  2 +#define STIM_CUDA_VOTE_SHARED
  3 +# include <iostream>
  4 +# include <cuda.h>
  5 +#include <stim/cuda/cudatools.h>
  6 +#include <stim/cuda/sharedmem.cuh>
  7 +#include "cpyToshare.cuh"
  8 +
  9 +namespace stim{
  10 + namespace cuda{
  11 +
  12 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  13 + template<typename T>
  14 + __global__ void cuda_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, int rmax, int x, int y){
  15 +
  16 + //generate a pointer to shared memory (size will be specified as a kernel parameter)
  17 + extern __shared__ float s_grad[];
  18 +
  19 + //calculate the start point for this block
  20 + int bxi = blockIdx.x * blockDim.x;
  21 +
  22 + // calculate the 2D coordinates for this current thread.
  23 + int xi = bxi + threadIdx.x;
  24 + int yi = blockIdx.y * blockDim.y + threadIdx.y;
  25 + // convert 2D coordinates to 1D
  26 + int i = yi * x + xi;
  27 +
  28 + // define a local variable to sum the votes from the voters
  29 + float sum = 0;
  30 +
  31 + //calculate the width of the shared memory block
  32 + int swidth = 2 * rmax + blockDim.x;
  33 +
  34 + // compute the size of window which will be checked for finding the proper voters for this pixel
  35 + int x_table = 2*rmax +1;
  36 + int rmax_sq = rmax * rmax;
  37 + int tx_rmax = threadIdx.x + rmax;
  38 + int bxs = bxi - rmax;
  39 +
  40 + //for every line (along y)
  41 + for(int yr = -rmax; yr <= rmax; yr++){
  42 + if (yi+yr<y && yi+yr>=0){
  43 + //copy the portion of the image necessary for this block to shared memory
  44 + __syncthreads();
  45 + cpyG2S1D2ch<float>(s_grad, gpuGrad, bxs, yi + yr , 2*swidth, 1, threadIdx, blockDim, x, y);
  46 + __syncthreads();
  47 +
  48 + if(xi < x && yi < y){
  49 +
  50 + for(int xr = -rmax; xr <= rmax; xr++){
  51 +
  52 + //find the location of this voter in the atan2 table
  53 + int id_t = (yr + rmax) * x_table + xr + rmax;
  54 +
  55 + // calculate the angle between the pixel and the current voter in x and y directions
  56 + float atan_angle = gpuTable[id_t];
  57 +
  58 + // calculate the voting direction based on the grtadient direction
  59 + int idx_share = xr + tx_rmax ;
  60 + float theta = s_grad[idx_share*2];
  61 + float mag = s_grad[idx_share*2 + 1];
  62 +
  63 +
  64 + // check if the current voter is located in the voting area of this pixel.
  65 + if (((xr * xr + yr *yr)< rmax_sq) && (abs(atan_angle - theta) <phi)){
  66 + sum += mag;
  67 +
  68 + }
  69 + }
  70 +
  71 + }
  72 + }
  73 + }
  74 + if(xi < x && yi < y)
  75 + gpuVote[i] = sum;
  76 +
  77 + }
  78 +
  79 + template<typename T>
  80 + void gpu_vote(T* gpuVote, T* gpuGrad, T* gpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  81 +
  82 +
  83 + unsigned int max_threads = stim::maxThreadsPerBlock();
  84 + dim3 threads(max_threads, 1);
  85 + dim3 blocks(x/threads.x + (x %threads.x == 0 ? 0:1) , y);
  86 +
  87 +
  88 + // specify share memory
  89 + unsigned int share_bytes = (2*rmax + threads.x)*1*2*sizeof(T);
  90 +
  91 + //call the kernel to do the voting
  92 + cuda_vote <<< blocks, threads,share_bytes >>>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  93 +
  94 + }
  95 +
  96 +
  97 + template<typename T>
  98 + void cpu_vote(T* cpuVote, T* cpuGrad,T* cpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  99 +
  100 + //calculate the number of bytes in the array
  101 + unsigned int bytes = x * y * sizeof(T);
  102 +
  103 + //calculate the number of bytes in the atan2 table
  104 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(T);
  105 +
  106 + //allocate space on the GPU for the Vote Image
  107 + T* gpuVote;
  108 + cudaMalloc(&gpuVote, bytes);
  109 +
  110 + //allocate space on the GPU for the input Gradient image
  111 + T* gpuGrad;
  112 + HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes*2));
  113 +
  114 + //copy the Gradient Magnitude data to the GPU
  115 + HANDLE_ERROR(cudaMemcpy(gpuGrad, cpuGrad, bytes*2, cudaMemcpyHostToDevice));
  116 +
  117 + //allocate space on the GPU for the atan2 table
  118 + T* gpuTable;
  119 + HANDLE_ERROR(cudaMalloc(&gpuTable, bytes_table));
  120 +
  121 + //copy the atan2 values to the GPU
  122 + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice));
  123 +
  124 + //call the GPU version of the vote calculation function
  125 + gpu_vote<T>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  126 +
  127 + //copy the Vote Data back to the CPU
  128 + cudaMemcpy(cpuVote, gpuVote, bytes, cudaMemcpyDeviceToHost) ;
  129 +
  130 + //free allocated memory
  131 + cudaFree(gpuTable);
  132 + cudaFree(gpuVote);
  133 + cudaFree(gpuGrad);
  134 + }
  135 +
  136 + }
  137 +}
  138 +
  139 +#endif
0 140 \ No newline at end of file
... ...
stim/cuda/ivote/vote_threshold_global.cuh 0 → 100644
  1 +#ifndef STIM_CUDA_VOTE_THRESHOLD_GLOBAL_H
  2 +#define STIM_CUDA_VOTE_THRESHOLD_GLOBAL_H
  3 +# include <iostream>
  4 +# include <cuda.h>
  5 +#include <stim/cuda/cudatools.h>
  6 +#include <stim/cuda/sharedmem.cuh>
  7 +#include "cpyToshare.cuh"
  8 +
  9 +namespace stim{
  10 + namespace cuda{
  11 +
  12 + // this kernel calculates the vote value by adding up the gradient magnitudes of every voter that this pixel is located in their voting area
  13 + template<typename T>
  14 + __global__ void cuda_vote(T* gpuVote, T* gpuTh, T* gpuTable, T phi, int rmax, int th_size, int x, int y){
  15 +
  16 +
  17 + // calculate the x coordinate for this current thread.
  18 + int xi = blockIdx.x * blockDim.x + threadIdx.x;
  19 +
  20 + // calculate the voting direction based on the grtadient direction
  21 + float theta = gpuTh[3*xi];
  22 + //find the gradient magnitude for the current voter
  23 + float mag = gpuTh[3*xi + 1];
  24 + //calculate the position and x, y coordinations of this voter in the original image
  25 + unsigned int i_v = gpuTh[3*xi+2];
  26 + unsigned int y_v = i_v/x;
  27 + unsigned int x_v = i_v - (y_v*x);
  28 +
  29 + // compute the size of window which will be checked for finding the proper voters for this pixel
  30 + int x_table = 2*rmax +1;
  31 + int rmax_sq = rmax * rmax;
  32 + if(xi < th_size){
  33 + for(int yr = -rmax; yr <= rmax; yr++){
  34 + for(int xr = -rmax; xr <= rmax; xr++){
  35 + if ((y_v+yr)>=0 && (y_v+yr)<y && (x_v+xr)>=0 && (x_v+xr)<x){
  36 +
  37 + //find the location of the current pixel in the atan2 table
  38 + unsigned int ind_t = (rmax - yr) * x_table + rmax - xr;
  39 +
  40 + // calculate the angle between the voter and the current pixel in x and y directions
  41 + float atan_angle = gpuTable[ind_t];
  42 +
  43 + // check if the current pixel is located in the voting area of this voter.
  44 + if (((xr * xr + yr *yr)< rmax_sq) && (abs(atan_angle - theta) <phi)){
  45 + // calculate the 1D index for the current pixel in global memory
  46 + unsigned int ind_g = (y_v+yr)*x + (x_v+xr);
  47 + atomicAdd(&gpuVote[ind_g], mag);
  48 +
  49 + }
  50 + }
  51 + }
  52 + }
  53 + }
  54 + }
  55 +
  56 + template<typename T>
  57 + void gpu_vote(T* gpuVote, T* gpuTh, T* gpuTable, T phi, unsigned int rmax, unsigned int th_size, unsigned int x, unsigned int y){
  58 +
  59 +
  60 + unsigned int max_threads = stim::maxThreadsPerBlock();
  61 + dim3 threads(max_threads);
  62 + dim3 blocks(th_size/threads.x + 1);
  63 +
  64 + //call the kernel to do the voting
  65 + cuda_vote <<< blocks, threads>>>(gpuVote, gpuTh, gpuTable, phi, rmax, th_size, x , y);
  66 +
  67 + }
  68 +
  69 +
  70 + template<typename T>
  71 + void cpu_vote(T* cpuVote, T* cpuGrad,T* cpuTable, T phi, unsigned int rmax, unsigned int x, unsigned int y){
  72 +
  73 + //calculate the number of bytes in the array
  74 + unsigned int bytes = x * y * sizeof(T);
  75 +
  76 + //calculate the number of bytes in the atan2 table
  77 + unsigned int bytes_table = (2*rmax+1) * (2*rmax+1) * sizeof(T);
  78 +
  79 + //allocate space on the GPU for the Vote Image
  80 + T* gpuVote;
  81 + cudaMalloc(&gpuVote, bytes);
  82 +
  83 + //allocate space on the GPU for the input Gradient image
  84 + T* gpuGrad;
  85 + HANDLE_ERROR(cudaMalloc(&gpuGrad, bytes*2));
  86 +
  87 + //copy the Gradient Magnitude data to the GPU
  88 + HANDLE_ERROR(cudaMemcpy(gpuGrad, cpuGrad, bytes*2, cudaMemcpyHostToDevice));
  89 +
  90 + //allocate space on the GPU for the atan2 table
  91 + T* gpuTable;
  92 + HANDLE_ERROR(cudaMalloc(&gpuTable, bytes_table));
  93 +
  94 + //copy the atan2 values to the GPU
  95 + HANDLE_ERROR(cudaMemcpy(gpuTable, cpuTable, bytes_table, cudaMemcpyHostToDevice));
  96 +
  97 + //call the GPU version of the vote calculation function
  98 + gpu_vote<T>(gpuVote, gpuGrad, gpuTable, phi, rmax, x , y);
  99 +
  100 + //copy the Vote Data back to the CPU
  101 + cudaMemcpy(cpuVote, gpuVote, bytes, cudaMemcpyDeviceToHost) ;
  102 +
  103 + //free allocated memory
  104 + cudaFree(gpuTable);
  105 + cudaFree(gpuVote);
  106 + cudaFree(gpuGrad);
  107 + }
  108 +
  109 + }
  110 +}
  111 +
  112 +#endif
0 113 \ No newline at end of file
... ...
stim/cuda/ivote_atomic.cuh renamed to stim/cuda/ivote_atomic_bb.cuh
1   -#ifndef STIM_CUDA_IVOTE_ATOMIC_H
2   -#define STIM_CUDA_IVOTE_ATOMIC_H
  1 +#ifndef STIM_CUDA_IVOTE_ATOMIC_BB_H
  2 +#define STIM_CUDA_IVOTE_ATOMIC_BB_H
3 3  
4 4 #include <stim/cuda/ivote/down_sample.cuh>
5 5 #include <stim/cuda/ivote/local_max.cuh>
6   -#include <stim/cuda/ivote/update_dir_global.cuh>
7   -//#include <stim/cuda/ivote/vote_shared_32-32.cuh>
8   -#include <stim/cuda/ivote/vote_atomic_shared.cuh>
9   -//#include <stim/cuda/ivote/re_sample.cuh>
  6 +#include <stim/cuda/ivote/update_dir_bb.cuh>
  7 +#include <stim/cuda/ivote/vote_atomic_bb.cuh>
  8 +
10 9 namespace stim{
11 10 namespace cuda{
12 11  
... ...
stim/envi/agilent_binary.h
... ... @@ -35,26 +35,28 @@ public:
35 35 void alloc(){
36 36 ptr = (T*) malloc(bytes());
37 37 }
38   - void alloc(short x, short y, short z){
  38 + void alloc(size_t x, size_t y, size_t z){
39 39 R[0] = x;
40 40 R[1] = y;
41 41 R[2] = z;
42 42 alloc();
43 43 }
44 44  
  45 + /// Create a deep copy of an agileng_binary object
45 46 void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){
46 47 dst->alloc(src->R[0], src->R[1], src->R[2]); //allocate memory
47 48 memcpy(dst->ptr, src->ptr, bytes()); //copy the data
48 49 memcpy(dst->Z, src->Z, sizeof(double) * 2); //copy the data z range
49 50 }
50 51  
  52 + /// Default constructor, sets the resolution to zero and the data pointer to NULL
51 53 agilent_binary(){
52   - memset(R, 0, sizeof(short) * 3); //set the resolution to zero
  54 + memset(R, 0, sizeof(size_t) * 3); //set the resolution to zero
53 55 ptr = NULL;
54 56 }
55 57  
56 58 /// Constructor with resolution
57   - agilent_binary(short x, short y, short z){
  59 + agilent_binary(size_t x, size_t y, size_t z){
58 60 alloc(x, y, z);
59 61 }
60 62  
... ... @@ -109,13 +111,11 @@ public:
109 111  
110 112 char zero = 0;
111 113 for(size_t i = 0; i < 9; i++) outfile.write(&zero, 1); //write 9 zeros
112   - outfile.write((char*)&R[0], 2);
  114 + outfile.write((char*)&R[2], 2);
113 115 for(size_t i = 0; i < 13; i++) outfile.write(&zero, 1); //write 13 zeros
  116 + outfile.write((char*)&R[0], 2);
114 117 outfile.write((char*)&R[1], 2);
115   - outfile.write((char*)&R[2], 2);
116 118 for(size_t i = 0; i < 992; i++) outfile.write(&zero, 1); //write 992 zeros
117   - //char zerovec[1020];
118   - //outfile.write((char*)zerovec, 1020);
119 119  
120 120 size_t b = bytes();
121 121 outfile.write((char*)ptr, b); //write the data to the output file
... ... @@ -149,7 +149,7 @@ public:
149 149  
150 150 #ifdef CUDA_FOUND
151 151 /// Perform an FFT and return a binary file with bands in the specified range
152   - agilent_binary<T> fft(float band_min, float band_max){
  152 + agilent_binary<T> fft(double band_min, double band_max, double ELWN = 15798, int UDR = 2){
153 153 auto total_start = std::chrono::high_resolution_clock::now();
154 154  
155 155 auto start = std::chrono::high_resolution_clock::now();
... ... @@ -177,8 +177,8 @@ public:
177 177  
178 178 start = std::chrono::high_resolution_clock::now();
179 179 int N[1]; //create an array with the interferogram size (required for cuFFT input)
180   - N[0] = R[2]; //set the only array value to the interferogram size
181   - if(cufftPlanMany(&plan, 1, N, NULL, 1, R[2], NULL, 1, R[2], CUFFT_R2C, batch) != CUFFT_SUCCESS){
  180 + N[0] = (int)R[2]; //set the only array value to the interferogram size
  181 + if(cufftPlanMany(&plan, 1, N, NULL, 1, (int)R[2], NULL, 1, (int)R[2], CUFFT_R2C, (int)batch) != CUFFT_SUCCESS){
182 182 std::cout<<"cuFFT Error: unable to create 1D plan."<<std::endl;
183 183 exit(1);
184 184 }
... ... @@ -199,12 +199,13 @@ public:
199 199 std::complex<T>* cpu_fft = (std::complex<T>*) malloc( R[0] * R[1] * (R[2]/2+1) * sizeof(std::complex<T>) );
200 200 HANDLE_ERROR(cudaMemcpy(cpu_fft, gpu_fft, R[0] * R[1] * (R[2]/2+1) * sizeof(cufftComplex), cudaMemcpyDeviceToHost)); //copy data from the host to the device
201 201  
202   - double int_delta = 0.00012656; //interferogram sample spacing in centimeters
  202 + //double int_delta = 0.00012656; //interferogram sample spacing in centimeters
  203 + double int_delta = (1.0 / ELWN) * ((double)UDR / 2.0); //calculate the interferogram spacing
203 204 double int_length = int_delta * R[2]; //interferogram length in centimeters
204 205 double fft_delta = 1/int_length; //spectrum spacing (in inverse centimeters, wavenumber
205 206  
206   - size_t start_i = std::ceil(band_min / fft_delta); //calculate the first band to store
207   - size_t size_i = std::floor(band_max / fft_delta) - start_i; //calculate the number of bands to store
  207 + size_t start_i = (size_t)std::ceil(band_min / fft_delta); //calculate the first band to store
  208 + size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i; //calculate the number of bands to store
208 209 size_t end_i = start_i + size_i; //last band number
209 210 agilent_binary<T> result(R[0], R[1], size_i);
210 211 result.Z[0] = start_i * fft_delta; //set the range for the FFT result
... ...
stim/envi/bil.h
... ... @@ -1309,6 +1309,66 @@ public:
1309 1309  
1310 1310 }
1311 1311  
  1312 + bool multiply(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1313 + unsigned long long B = Z(); //calculate the number of bands
  1314 + unsigned long long ZX = Z() * X();
  1315 + unsigned long long XY = X() * Y(); //calculate the number of pixels in a band
  1316 + unsigned long long S = XY * sizeof(T); //calculate the number of bytes in a band
  1317 + unsigned long long L = ZX * sizeof(T);
  1318 +
  1319 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1320 +
  1321 + T * c; //pointer to the current ZX slice
  1322 + c = (T*)malloc( L ); //allocate space for the slice
  1323 +
  1324 + for(unsigned long long j = 0; j < Y(); j++){ //for each line
  1325 + read_plane_y(c, j); //load the line into memory
  1326 + for(unsigned long long i = 0; i < B; i++){ //for each band
  1327 + for(unsigned long long m = 0; m < X(); m++){ //for each sample
  1328 + if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked
  1329 + c[m + i * X()] *= (T)v;
  1330 + }
  1331 + }
  1332 + target.write(reinterpret_cast<const char*>(c), L); //write normalized data into destination
  1333 +
  1334 + if(PROGRESS) progress = (double)(j+1) / Y() * 100; //update the progress
  1335 + }
  1336 +
  1337 + free(c); //free the slice memory
  1338 + target.close(); //close the output file
  1339 + return true;
  1340 + }
  1341 +
  1342 + bool add(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1343 + unsigned long long B = Z(); //calculate the number of bands
  1344 + unsigned long long ZX = Z() * X();
  1345 + unsigned long long XY = X() * Y(); //calculate the number of pixels in a band
  1346 + unsigned long long S = XY * sizeof(T); //calculate the number of bytes in a band
  1347 + unsigned long long L = ZX * sizeof(T);
  1348 +
  1349 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1350 +
  1351 + T * c; //pointer to the current ZX slice
  1352 + c = (T*)malloc( L ); //allocate space for the slice
  1353 +
  1354 + for(unsigned long long j = 0; j < Y(); j++){ //for each line
  1355 + read_plane_y(c, j); //load the line into memory
  1356 + for(unsigned long long i = 0; i < B; i++){ //for each band
  1357 + for(unsigned long long m = 0; m < X(); m++){ //for each sample
  1358 + if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked
  1359 + c[m + i * X()] += (T)v;
  1360 + }
  1361 + }
  1362 + target.write(reinterpret_cast<const char*>(c), L); //write normalized data into destination
  1363 +
  1364 + if(PROGRESS) progress = (double)(j+1) / Y() * 100; //update the progress
  1365 + }
  1366 +
  1367 + free(c); //free the slice memory
  1368 + target.close(); //close the output file
  1369 + return true;
  1370 + }
  1371 +
1312 1372 /// Close the file.
1313 1373 bool close(){
1314 1374 file.close();
... ...
stim/envi/binary.h
... ... @@ -7,6 +7,13 @@
7 7 #include "../math/vector.h"
8 8 #include <fstream>
9 9 #include <sys/stat.h>
  10 +#include <cstring>
  11 +
  12 +#ifdef _WIN32
  13 +#include <Windows.h>
  14 +#else
  15 +#include <unistd.h>
  16 +#endif
10 17  
11 18 namespace stim{
12 19  
... ... @@ -30,14 +37,16 @@ protected:
30 37  
31 38 double progress; //stores the progress on the current operation (accessible using a thread)
32 39  
  40 + size_t buffer_size; //available memory for processing large files
33 41  
34 42 /// Private initialization function used to set default parameters in the data structure.
35 43 void init(){
36   - memset(R, 0, sizeof(unsigned long long) * D); //initialize the resolution to zero
37   - header = 0; //initialize the header size to zero
  44 + std::memset(R, 0, sizeof(unsigned long long) * D); //initialize the resolution to zero
  45 + header = 0; //initialize the header size to zero
38 46 mask = NULL;
39 47  
40 48 progress = 0;
  49 + set_buffer(); //set the maximum buffer size to the default
41 50 }
42 51  
43 52 /// Private helper function that returns the size of the file on disk using system functions.
... ... @@ -105,6 +114,11 @@ protected:
105 114  
106 115 public:
107 116  
  117 + //default constructor
  118 + binary(){
  119 + init();
  120 + }
  121 +
108 122 double get_progress(){
109 123 return progress;
110 124 }
... ... @@ -113,6 +127,20 @@ public:
113 127 progress = 0;
114 128 }
115 129  
  130 + //specify the maximum fraction of available memory that this class will use for buffering
  131 + void set_buffer(double mem_frac = 0.5){ //default to 50%
  132 +#ifdef _WIN32
  133 + MEMORYSTATUSEX statex;
  134 + statex.dwLength = sizeof (statex);
  135 + GlobalMemoryStatusEx (&statex);
  136 + buffer_size = (size_t)(statex.ullAvailPhys * mem_frac);
  137 +#else
  138 + size_t pages = sysconf(_SC_PHYS_PAGES);
  139 + size_t page_size = sysconf(_SC_PAGE_SIZE);
  140 + buffer_size = (size_t)(pages * page_size * mem_frac);
  141 +#endif
  142 + }
  143 +
116 144 /// Open a binary file for streaming.
117 145  
118 146 /// @param filename is the name of the binary file
... ... @@ -375,6 +403,96 @@ public:
375 403 return read_pixel(p, i);
376 404 }
377 405  
  406 + /// Reads a block specified by an (x, y, z) position and size using the largest possible contiguous reads
  407 + bool read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
  408 +
  409 + size_t size_bytes = sx * sy * sz * sizeof(T); //size of the block to read in bytes
  410 +
  411 + size_t start = z * R[0] * R[1] + y * R[0] + x; //calculate the start postion
  412 + size_t start_bytes = start * sizeof(T); //start position in bytes
  413 + file.seekg(start * sizeof(T), std::ios::beg); //seek to the start position
  414 +
  415 +
  416 + if(sx == R[0] && sy == R[1]){ //if sx and sy result in a contiguous volume along z
  417 + file.read((char*)dest, size_bytes); //read the block in one pass
  418 + return true;
  419 + }
  420 +
  421 + if(sx == R[0]){ //if sx is contiguous, read each z-axis slice can be read in one pass
  422 + size_t jump_bytes = (R[1] - sy) * R[0] * sizeof(T); //jump between each slice
  423 + size_t slice_bytes = sx * sy * sizeof(T); //size of the slice to be read
  424 + for(size_t zi = 0; zi < sz; zi++){ //for each z-axis slice
  425 + file.read((char*)dest, slice_bytes); //read the slice
  426 + dest += sx * sy; //move the destination pointer to the next slice
  427 + file.seekg(jump_bytes, std::ios::cur); //skip to the next slice in the file
  428 + }
  429 + return true;
  430 + }
  431 +
  432 + //in this case, x is not contiguous so the volume must be read line-by-line
  433 + size_t jump_x_bytes = (R[0] - sx) * sizeof(T); //number of bytes skipped in the x direction
  434 + size_t jump_y_bytes = (R[1] - sy) * R[0] * sizeof(T) + jump_x_bytes; //number of bytes skipped between slices
  435 + size_t line_bytes = sx * sizeof(T); //size of the line to be read
  436 + size_t zi, yi;
  437 + for(zi = 0; zi < sz; zi++){ //for each slice
  438 + file.read((char*)dest, line_bytes); //read the first line
  439 + for(yi = 1; yi < sy; yi++){ //read each additional line
  440 + dest += sx; //move the pointer in the destination block to the next line
  441 + file.seekg(jump_x_bytes, std::ios::cur); //skip to the next line in the file
  442 + file.read((char*)dest, line_bytes); //read the line to the destination block
  443 + }
  444 + file.seekg(jump_y_bytes, std::ios::cur); //skip to the beginning of the next slice
  445 + }
  446 + return false;
  447 + }
  448 +
  449 + // permutes a block of data from the current interleave to the interleave specified (re-arranged dimensions to the order specified by [d0, d1, d2])
  450 +
  451 + void permute(T* dest, T* src, size_t sx, size_t sy, size_t sz, size_t d0, size_t d1, size_t d2){
  452 + size_t d[3] = {d0, d1, d2};
  453 + size_t s[3] = {sx, sy, sz};
  454 + size_t p[3];// = {x, y, z};
  455 +
  456 + if(d[0] == 0 && d[1] == 1 && d[2] == 2){
  457 + //this isn't actually a permute - just copy the data
  458 + memcpy(dest, src, sizeof(T) * sx * sy * sz);
  459 + }
  460 + else if(d[0] == 0){ //the individual lines are contiguous, so you can memcpy line-by-line
  461 + size_t y, z;
  462 + size_t src_idx, dest_idx;
  463 + size_t x_bytes = sizeof(T) * sx;
  464 + for(z = 0; z < sz; z++){
  465 + p[2] = z;
  466 + for(y = 0; y < sy; y++){
  467 + p[1] = y;
  468 + src_idx = z * sx * sy + y * sx;
  469 + dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]];
  470 + //std::cout<<z<<", "<<y<<" ------- "<<p[d[2]]<<" * "<<s[d[0]]<<" * "<<s[d[1]]<<" + "<<p[d[1]]<<" * "<<s[d[0]]<<std::endl;
  471 + memcpy(dest + dest_idx, src + src_idx, x_bytes);
  472 + }
  473 + }
  474 + }
  475 + else{ //loop through every damn point
  476 + size_t x, y, z;
  477 + size_t src_idx, dest_idx;
  478 + size_t src_z, src_y;
  479 + for(z = 0; z < sz; z++){
  480 + p[2] = z;
  481 + src_z = z * sx * sy;
  482 + for(y = 0; y < sy; y++){
  483 + p[1] = y;
  484 + src_y = src_z + y * sx;
  485 + for(x = 0; x < sx; x++){
  486 + p[0] = x;
  487 + src_idx = src_y + x;
  488 + dest_idx = p[d[2]] * s[d[0]] * s[d[1]] + p[d[1]] * s[d[0]] + p[d[0]];
  489 + dest[dest_idx] = src[src_idx];
  490 + }
  491 + }
  492 + }
  493 + }
  494 + }
  495 +
378 496 };
379 497  
380 498 }
... ...
stim/envi/bip.h
... ... @@ -373,7 +373,7 @@ public:
373 373 for(size_t xy = 0; xy < XY; xy++){ //for each pixel
374 374 memset(spec, 0, Bb); //set the spectrum to zero
375 375 if(mask == NULL || mask[xy]){ //if the pixel is masked
376   - len = 0; //initialize the
  376 + len = 0; //initialize the
377 377 file.read((char*)spec, Bb); //read a spectrum
378 378 for(size_t b = 0; b < B; b++) //for each band
379 379 len += spec[b]*spec[b]; //add the square of the spectral band
... ... @@ -385,7 +385,7 @@ public:
385 385 file.seekg(Bb, std::ios::cur); //otherwise skip a spectrum
386 386 target.write((char*)spec, Bb); //output the normalized spectrum
387 387 if(PROGRESS) progress = (double)(xy + 1) / (double)XY * 100; //update the progress
388   - }
  388 + }
389 389 }
390 390  
391 391  
... ... @@ -1088,6 +1088,232 @@ public:
1088 1088 return true;
1089 1089 }
1090 1090  
  1091 +
  1092 +#ifdef CUDA_FOUND
  1093 + /// Calculate the covariance matrix of Noise for masked pixels using cuBLAS
  1094 + /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra
  1095 + bool coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
  1096 +
  1097 + cudaError_t cudaStat;
  1098 + cublasStatus_t stat;
  1099 + cublasHandle_t handle;
  1100 +
  1101 + progress = 0; //initialize the progress to zero (0)
  1102 + unsigned long long XY = X() * Y(); //calculate the number of elements in a band image
  1103 + unsigned long long B = Z(); //calculate the number of spectral elements
  1104 +
  1105 + double* s = (double*)malloc(sizeof(double) * B); //allocate space for the spectrum that will be pulled from the file
  1106 + double* s_dev; //declare a device pointer that will store the spectrum on the GPU
  1107 +
  1108 + double* s2_dev; // device pointer on the GPU
  1109 + cudaStat = cudaMalloc(&s2_dev, B * sizeof(double)); // allocate space on the CUDA device
  1110 + cudaStat = cudaMemset(s2_dev, 0, B * sizeof(double)); // initialize s2_dev to zero (0)
  1111 +
  1112 + double* A_dev; //declare a device pointer that will store the covariance matrix on the GPU
  1113 + double* avg_dev; //declare a device pointer that will store the average spectrum
  1114 + cudaStat = cudaMalloc(&s_dev, B * sizeof(double)); //allocate space on the CUDA device for the spectrum
  1115 + cudaStat = cudaMalloc(&A_dev, B * B * sizeof(double)); //allocate space on the CUDA device for the covariance matrix
  1116 + cudaStat = cudaMemset(A_dev, 0, B * B * sizeof(double)); //initialize the covariance matrix to zero (0)
  1117 + cudaStat = cudaMalloc(&avg_dev, B * sizeof(double)); //allocate space on the CUDA device for the average spectrum
  1118 + stat = cublasSetVector((int)B, sizeof(double), avg, 1, avg_dev, 1); //copy the average spectrum to the CUDA device
  1119 +
  1120 + double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product)
  1121 + double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction)
  1122 +
  1123 + stat = cublasCreate(&handle); //create a cuBLAS instance
  1124 + if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid
  1125 + printf ("CUBLAS initialization failed\n");
  1126 + return EXIT_FAILURE;
  1127 + }
  1128 + for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel
  1129 + if (mask == NULL || mask[xy] != 0){
  1130 + pixeld(s, xy); //retreive the spectrum at the current xy pixel location
  1131 +
  1132 + stat = cublasSetVector((int)B, sizeof(double), s, 1, s_dev, 1); //copy the spectrum from the host to the device
  1133 + stat = cublasDaxpy(handle, (int)B, &axpy_alpha, avg_dev, 1, s_dev, 1); //subtract the average spectrum
  1134 +
  1135 + cudaMemcpy(s2_dev, s_dev + 1 , (B-1) * sizeof(double), cudaMemcpyDeviceToDevice); //copy B-1 elements from shifted source data (s_dev) to device pointer (s2_dev )
  1136 + stat = cublasDaxpy(handle, (int)B, &axpy_alpha, s2_dev, 1, s_dev, 1); //Minimum/Maximum Autocorrelation Factors (MAF) method : subtranct each pixel from adjacent pixel (z direction is choosed to do so , which is almost the same as x or y direction or even average of them )
  1137 +
  1138 +
  1139 + stat = cublasDsyr(handle, CUBLAS_FILL_MODE_UPPER, (int)B, &ger_alpha, s_dev, 1, A_dev, (int)B); //calculate the covariance matrix (symmetric outer product)
  1140 + }
  1141 + if(PROGRESS) progress = (double)(xy+1) / XY * 100; //record the current progress
  1142 +
  1143 + }
  1144 +
  1145 + cublasGetMatrix((int)B, (int)B, sizeof(double), A_dev, (int)B, coN, (int)B); //copy the result from the GPU to the CPU
  1146 +
  1147 + cudaFree(A_dev); //clean up allocated device memory
  1148 + cudaFree(s_dev);
  1149 + cudaFree(s2_dev);
  1150 + cudaFree(avg_dev);
  1151 +
  1152 + for(unsigned long long i = 0; i < B; i++){ //copy the upper triangular portion to the lower triangular portion
  1153 + for(unsigned long long j = i+1; j < B; j++){
  1154 + coN[B * i + j] = coN[B * j + i];
  1155 + }
  1156 + }
  1157 +
  1158 + return true;
  1159 + }
  1160 +#endif
  1161 +
  1162 + /// Calculate the covariance of noise matrix for all masked pixels in the image with 64-bit floating point precision.
  1163 +
  1164 + /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
  1165 + /// @param avg is a pointer to memory of size B that stores the average spectrum
  1166 + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
  1167 + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
  1168 +
  1169 +#ifdef CUDA_FOUND
  1170 + int dev_count;
  1171 + cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
  1172 + cudaDeviceProp prop;
  1173 + cudaGetDeviceProperties(&prop, 0); //get the property of the first device
  1174 + if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator
  1175 + return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1176 +#endif
  1177 +
  1178 +
  1179 +
  1180 + progress = 0;
  1181 + //memory allocation
  1182 + unsigned long long XY = X() * Y();
  1183 + unsigned long long B = Z();
  1184 + T* temp = (T*)malloc(sizeof(T) * B);
  1185 +
  1186 + unsigned long long count = nnz(mask); //count the number of masked pixels
  1187 +
  1188 + //initialize covariance matrix of noise
  1189 + memset(coN, 0, B * B * sizeof(double));
  1190 +
  1191 + //calculate covariance matrix
  1192 + double* coN_half = (double*) malloc(B * B * sizeof(double)); //allocate space for a higher-precision intermediate matrix
  1193 + double* temp_precise = (double*) malloc(B * sizeof(double));
  1194 + memset(coN_half, 0, B * B * sizeof(double)); //initialize the high-precision matrix with zeros
  1195 + unsigned long long idx; //stores i*B to speed indexing
  1196 + for (unsigned long long xy = 0; xy < XY; xy++){
  1197 + if (mask == NULL || mask[xy] != 0){
  1198 + pixel(temp, xy); //retreive the spectrum at the current xy pixel location
  1199 + for(unsigned long long b = 0; b < B; b++) //subtract the mean from this spectrum and increase the precision
  1200 + temp_precise[b] = (double)temp[b] - (double)avg[b];
  1201 +
  1202 + for(unsigned long long b2 = 0; b2 < B-1; b2++) //Minimum/Maximum Autocorrelation Factors (MAF) method : subtranct each pixel from adjacent pixel (z direction is choosed to do so , which is almost the same as x or y direction or even average of them )
  1203 + temp_precise[b2] -= temp_precise[b2+1];
  1204 +
  1205 + idx = 0;
  1206 + for (unsigned long long b0 = 0; b0 < B; b0++){ //for each band
  1207 + for (unsigned long long b1 = b0; b1 < B; b1++)
  1208 + coN_half[idx++] += temp_precise[b0] * temp_precise[b1];
  1209 + }
  1210 + }
  1211 + if(PROGRESS) progress = (double)(xy+1) / XY * 100;
  1212 + }
  1213 + idx = 0;
  1214 + for (unsigned long long i = 0; i < B; i++){ //copy the precision matrix to both halves of the output matrix
  1215 + for (unsigned long long j = i; j < B; j++){
  1216 + coN[j * B + i] = coN[i * B + j] = coN_half[idx++] / (double) count;
  1217 + }
  1218 + }
  1219 +
  1220 + free(temp);
  1221 + free(temp_precise);
  1222 + return true;
  1223 + }
  1224 +
  1225 + #ifdef CUDA_FOUND
  1226 + /// Project the spectra onto a set of basis functions
  1227 + /// @param outfile is the name of the new binary output file that will be created
  1228 + /// @param center is a spectrum about which the data set will be rotated (ex. when performing mean centering)
  1229 + /// @param basis a set of basis vectors that the data set will be projected onto (after centering)
  1230 + /// @param M is the number of basis vectors
  1231 + /// @param mask is a character mask used to limit processing to valid pixels
  1232 + bool project_cublas(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){
  1233 +
  1234 + cudaError_t cudaStat;
  1235 + cublasStatus_t stat;
  1236 + cublasHandle_t handle;
  1237 +
  1238 + std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file
  1239 +
  1240 + progress = 0; //initialize the progress to zero (0)
  1241 + unsigned long long XY = X() * Y(); //calculate the number of elements in a band image
  1242 + unsigned long long B = Z(); //calculate the number of spectral elements
  1243 +
  1244 + double* s = (double*)malloc(sizeof(double) * B); //allocate space for the spectrum that will be pulled from the file
  1245 + double* s_dev; //declare a device pointer that will store the spectrum on the GPU
  1246 + cudaStat = cudaMalloc(&s_dev, B * sizeof(double)); //allocate space on the CUDA device for the spectrum
  1247 +
  1248 +
  1249 + double* basis_dev; // device pointer on the GPU
  1250 + cudaStat = cudaMalloc(&basis_dev, M * B * sizeof(double)); // allocate space on the CUDA device
  1251 + cudaStat = cudaMemset(basis_dev, 0, M * B * sizeof(double)); // initialize basis_dev to zero (0)
  1252 +
  1253 +
  1254 + /// transposing basis matrix (because cuBLAS is column-major)
  1255 + double *basis_Transposed = (double*)malloc(M * B * sizeof(double));
  1256 + memset(basis_Transposed, 0, M * B * sizeof(double));
  1257 + for (int i = 0; i<M; i++)
  1258 + for (int j = 0; j<B; j++)
  1259 + basis_Transposed[i+j*M] = basis[i*B+j];
  1260 +
  1261 + stat = cublasSetMatrix((int)M, (int)B, sizeof(double),basis_Transposed, (int)M, basis_dev, (int)M); //copy the basis_Transposed matrix to the CUDA device (both matrices are stored in column-major format)
  1262 +
  1263 + double* center_dev; //declare a device pointer that will store the center (average)
  1264 + cudaStat = cudaMalloc(&center_dev, B * sizeof(double)); //allocate space on the CUDA device for the center (average)
  1265 + stat = cublasSetVector((int)B, sizeof(double), center, 1, center_dev, 1); //copy the center vector (average) to the CUDA device (from host to device)
  1266 +
  1267 +
  1268 + double* A = (double*)malloc(sizeof(double) * M); //allocate space for the projected pixel on the host
  1269 + double* A_dev; //declare a device pointer that will store the projected pixel on the GPU
  1270 + cudaStat = cudaMalloc(&A_dev,M * sizeof(double)); //allocate space on the CUDA device for the projected pixel
  1271 + cudaStat = cudaMemset(A_dev, 0,M * sizeof(double)); //initialize the projected pixel to zero (0)
  1272 +
  1273 + double axpy_alpha = -1; //multiplication factor for the center (in order to perform a subtraction)
  1274 + double axpy_alpha2 = 1; //multiplication factor for the matrix-vector multiplication
  1275 + double axpy_beta = 0; //multiplication factor for the matrix-vector multiplication (there is no second scalor)
  1276 +
  1277 + stat = cublasCreate(&handle); //create a cuBLAS instance
  1278 + if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid
  1279 + printf ("CUBLAS initialization failed\n");
  1280 + return EXIT_FAILURE;
  1281 + }
  1282 +
  1283 + T* temp = (T*)malloc(sizeof(T) * M); //allocate space for the projected pixel to be written on the disc
  1284 + size_t i;
  1285 + for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel
  1286 + if (mask == NULL || mask[xy] != 0){
  1287 + pixeld(s, xy); //retreive the spectrum at the current xy pixel location
  1288 +
  1289 + stat = cublasSetVector((int)B, sizeof(double), s, 1, s_dev, 1); //copy the spectrum from the host to the device
  1290 + stat = cublasDaxpy(handle, (int)B, &axpy_alpha, center_dev, 1, s_dev, 1); //subtract the center (average)
  1291 + stat = cublasDgemv(handle,CUBLAS_OP_N,(int)M,(int)B,&axpy_alpha2,basis_dev,(int)M,s_dev,1,&axpy_beta,A_dev,1); //performs the matrix-vector multiplication
  1292 + stat = cublasGetVector((int)B, sizeof(double), A_dev, 1, A, 1); //copy the projected pixel to the host (from GPU to CPU)
  1293 +
  1294 + //std::copy<double*, T*>(A, A + M, temp);
  1295 + for(i = 0; i < M; i++) temp[i] = (T)A[i]; //casting projected pixel from double to whatever T is
  1296 + }
  1297 +
  1298 + target.write(reinterpret_cast<const char*>(temp), sizeof(T) * M); //write the projected vector
  1299 + if(PROGRESS) progress = (double)(xy+1) / XY * 100; //record the current progress
  1300 +
  1301 + }
  1302 +
  1303 + //clean up allocated device memory
  1304 + cudaFree(A_dev);
  1305 + cudaFree(s_dev);
  1306 + cudaFree(basis_dev);
  1307 + cudaFree(center_dev);
  1308 + free(A);
  1309 + free(s);
  1310 + free(temp);
  1311 + target.close(); //close the output file
  1312 +
  1313 + return true;
  1314 + }
  1315 +#endif
  1316 +
1091 1317 /// Project the spectra onto a set of basis functions
1092 1318 /// @param outfile is the name of the new binary output file that will be created
1093 1319 /// @param center is a spectrum about which the data set will be rotated (ex. when performing mean centering)
... ... @@ -1096,6 +1322,14 @@ public:
1096 1322 /// @param mask is a character mask used to limit processing to valid pixels
1097 1323 bool project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){
1098 1324  
  1325 +#ifdef CUDA_FOUND
  1326 + int dev_count;
  1327 + cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
  1328 + cudaDeviceProp prop;
  1329 + cudaGetDeviceProperties(&prop, 0); //get the property of the first device
  1330 + if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator
  1331 + return project_cublas(outfile,center,basis,M,mask,PROGRESS); //use cuBLAS to calculate the covariance matrix
  1332 +#endif
1099 1333 std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file
1100 1334 //std::string headername = outfile + ".hdr"; //the header file name
1101 1335  
... ... @@ -1125,7 +1359,7 @@ public:
1125 1359 free(s); //free temporary storage arrays
1126 1360 free(rs);
1127 1361 target.close(); //close the output file
1128   -
  1362 +
1129 1363 return true;
1130 1364 }
1131 1365  
... ... @@ -1395,6 +1629,52 @@ public:
1395 1629 }
1396 1630 }
1397 1631  
  1632 + bool multiply(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1633 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1634 + std::string headername = outname + ".hdr"; //the header file name
  1635 +
  1636 + unsigned long long N = X() * Y(); //calculate the total number of pixels to be processed
  1637 + unsigned long long B = Z(); //get the number of bands
  1638 + T* s = (T*)malloc(sizeof(T) * B); //allocate memory to store a pixel
  1639 + for(unsigned long long n = 0; n < N; n++){ //for each pixel in the image
  1640 + if(mask == NULL || mask[n]){ //if the pixel is masked
  1641 + for(size_t b = 0; b < B; b++) //for each band in the spectrum
  1642 + s[b] *= (T)v; //multiply
  1643 + }
  1644 +
  1645 + if(PROGRESS) progress = (double)(n+1) / N * 100; //set the current progress
  1646 +
  1647 + target.write((char*)s, sizeof(T) * B); //write the corrected data into destination
  1648 + } //end for each pixel
  1649 +
  1650 + free(s); //free the spectrum
  1651 + target.close(); //close the output file
  1652 + return true;
  1653 + }
  1654 +
  1655 + bool add(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1656 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1657 + std::string headername = outname + ".hdr"; //the header file name
  1658 +
  1659 + unsigned long long N = X() * Y(); //calculate the total number of pixels to be processed
  1660 + unsigned long long B = Z(); //get the number of bands
  1661 + T* s = (T*)malloc(sizeof(T) * B); //allocate memory to store a pixel
  1662 + for(unsigned long long n = 0; n < N; n++){ //for each pixel in the image
  1663 + if(mask == NULL || mask[n]){ //if the pixel is masked
  1664 + for(size_t b = 0; b < B; b++) //for each band in the spectrum
  1665 + s[b] += (T)v; //multiply
  1666 + }
  1667 +
  1668 + if(PROGRESS) progress = (double)(n+1) / N * 100; //set the current progress
  1669 +
  1670 + target.write((char*)s, sizeof(T) * B); //write the corrected data into destination
  1671 + } //end for each pixel
  1672 +
  1673 + free(s); //free the spectrum
  1674 + target.close(); //close the output file
  1675 + return true;
  1676 + }
  1677 +
1398 1678  
1399 1679  
1400 1680 /// Close the file.
... ...
stim/envi/bsq.h
... ... @@ -9,6 +9,7 @@
9 9 #include <vector>
10 10 #include <deque>
11 11 #include <chrono>
  12 +#include <future>
12 13  
13 14  
14 15  
... ... @@ -376,36 +377,144 @@ public:
376 377  
377 378 }
378 379  
379   - /// Convert the current BSQ file to a BIL file with the specified file name.
380   -
381   - /// @param outname is the name of the output BIL file to be saved to disk.
382   - bool bil(std::string outname, bool PROGRESS = false)
383   - {
384   - //simplify image resolution
385   - unsigned long long jump = (Y() - 1) * X() * sizeof(T);
  380 + void readlines(T* dest, size_t start, size_t n){
  381 + hsi<T>::read(dest, 0, start, 0, X(), n, Z());
  382 + }
386 383  
387   - std::ofstream target(outname.c_str(), std::ios::binary);
388   - std::string headername = outname + ".hdr";
  384 + /// Convert this BSQ file to a BIL
  385 + bool bil(std::string outname, bool PROGRESS = false){
389 386  
390   - unsigned long long L = X();
391   - T* line = (T*)malloc(sizeof(T) * L);
  387 + const size_t buffers = 4; //number of buffers required for this algorithm
  388 + size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
392 389  
393   - for ( unsigned long long y = 0; y < Y(); y++) //for each y position
394   - {
395   - file.seekg(y * X() * sizeof(T), std::ios::beg); //seek to the beginning of the xz slice
396   - for ( unsigned long long z = 0; z < Z(); z++ ) //for each band
397   - {
398   - file.read((char *)line, sizeof(T) * X()); //read a line
399   - target.write((char*)line, sizeof(T) * X()); //write the line to the output file
400   - file.seekg(jump, std::ios::cur); //seek to the next band
401   - if(PROGRESS) progress = (double)((y+1) * Z() + z + 1) / (Z() * Y()) * 100; //update the progress counter
  390 + size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
  391 + size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
  392 + if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
  393 + std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
  394 + exit(1);
  395 + }
  396 + size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers
  397 +
  398 + T* src[2]; //source double-buffer for asynchronous batching
  399 + src[0] = (T*) malloc(max_batch_bytes);
  400 + src[1] = (T*) malloc(max_batch_bytes);
  401 + T* dst[2]; //destination double-buffer for asynchronous batching
  402 + dst[0] = (T*) malloc(max_batch_bytes);
  403 + dst[1] = (T*) malloc(max_batch_bytes);
  404 +
  405 + size_t N[2]; //number of slices stored in buffers 0 and 1
  406 + N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
  407 +
  408 + std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing
  409 + //initialize with buffer 0 (used for double buffering)
  410 + size_t y_load = 0;
  411 + size_t y_proc = 0;
  412 + std::future<void> rthread;
  413 + std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
  414 +
  415 + readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
  416 + y_load += N[0]; //increment the loaded slice counter
  417 + int b = 1;
  418 +
  419 + std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers
  420 + std::chrono::high_resolution_clock::time_point t_end;
  421 + size_t t_batch; //number of milliseconds to process a batch
  422 + size_t t_total = 0;
  423 + while(y_proc < Y()){ //while there are still slices to be processed
  424 + t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
  425 + if(y_load < Y()){ //if there are still slices to be loaded, load them
  426 + if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size
  427 + rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
  428 +
  429 + y_load += N[b]; //increment the number of loaded slices
402 430 }
  431 +
  432 + b = !b; //swap the double-buffer
  433 +
  434 + binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 0, 2, 1); //permute the batch to a BIL file
  435 + target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file
  436 + y_proc += N[b]; //increment the counter of processed pixels
  437 + if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  438 + t_end = std::chrono::high_resolution_clock::now();
  439 + t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
  440 + t_total += t_batch;
  441 + rthread.wait();
403 442 }
404 443  
405   - free(line);
406   - target.close();
  444 + std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl;
  445 + free(src[0]); //free buffer resources
  446 + free(src[1]);
  447 + free(dst[0]);
  448 + free(dst[1]);
  449 + return true; //return true
  450 + }
407 451  
408   - return true;
  452 + /// Convert this BSQ file to a BIP
  453 + bool bip(std::string outname, bool PROGRESS = false){
  454 +
  455 + const size_t buffers = 4; //number of buffers required for this algorithm
  456 + size_t mem_per_batch = binary<T>::buffer_size / buffers; //calculate the maximum memory available for a batch
  457 +
  458 + size_t slice_bytes = X() * Z() * sizeof(T); //number of bytes in an input batch slice (Y-slice in this case)
  459 + size_t max_slices_per_batch = mem_per_batch / slice_bytes; //maximum number of slices we can process in one batch given memory constraints
  460 + if(max_slices_per_batch == 0){ //if there is insufficient memory for a single slice, throw an error
  461 + std::cout<<"error, insufficient memory for stim::bsq::bil()"<<std::endl;
  462 + exit(1);
  463 + }
  464 + size_t max_batch_bytes = max_slices_per_batch * slice_bytes; //calculate the amount of memory that will be allocated for all four buffers
  465 +
  466 + T* src[2]; //source double-buffer for asynchronous batching
  467 + src[0] = (T*) malloc(max_batch_bytes);
  468 + src[1] = (T*) malloc(max_batch_bytes);
  469 + T* dst[2]; //destination double-buffer for asynchronous batching
  470 + dst[0] = (T*) malloc(max_batch_bytes);
  471 + dst[1] = (T*) malloc(max_batch_bytes);
  472 +
  473 + size_t N[2]; //number of slices stored in buffers 0 and 1
  474 + N[0] = N[1] = min(Y(), max_slices_per_batch); //start with the maximum number of slices that can be stored (may be the entire data set)
  475 +
  476 + std::ofstream target(outname.c_str(), std::ios::binary); //open an output file for writing
  477 + //initialize with buffer 0 (used for double buffering)
  478 + size_t y_load = 0;
  479 + size_t y_proc = 0;
  480 + std::future<void> rthread;
  481 + std::future<std::ostream&> wthread; //create asynchronous threads for reading and writing
  482 +
  483 + readlines(src[0], 0, N[0]); //read the first batch into the 0 source buffer
  484 + y_load += N[0]; //increment the loaded slice counter
  485 + int b = 1;
  486 +
  487 + std::chrono::high_resolution_clock::time_point t_start; //high-resolution timers
  488 + std::chrono::high_resolution_clock::time_point t_end;
  489 + size_t t_batch; //number of milliseconds to process a batch
  490 + size_t t_total = 0;
  491 + while(y_proc < Y()){ //while there are still slices to be processed
  492 + t_start = std::chrono::high_resolution_clock::now(); //start the timer for this batch
  493 + if(y_load < Y()){ //if there are still slices to be loaded, load them
  494 + if(y_load + N[b] > Y()) N[b] = Y() - y_load; //if the next batch would process more than the total slices, adjust the batch size
  495 + rthread = std::async(std::launch::async, &stim::bsq<T>::readlines, this, src[b], y_load, N[b]);
  496 +
  497 + y_load += N[b]; //increment the number of loaded slices
  498 + }
  499 +
  500 + b = !b; //swap the double-buffer
  501 +
  502 + binary<T>::permute(dst[b], src[b], X(), N[b], Z(), 2, 0, 1); //permute the batch to a BIP file
  503 + target.write((char*)dst[b], N[b] * slice_bytes); //write the permuted data to the output file
  504 + y_proc += N[b]; //increment the counter of processed pixels
  505 + if(PROGRESS) progress = (double)( y_proc + 1 ) / Y() * 100; //increment the progress counter based on the number of processed pixels
  506 + t_end = std::chrono::high_resolution_clock::now();
  507 + t_batch = std::chrono::duration_cast<std::chrono::milliseconds>(t_end-t_start).count();
  508 + t_total += t_batch;
  509 + rthread.wait();
  510 + }
  511 +
  512 + std::cout<<"Total time to execute: "<<t_total<<" ms"<<std::endl;
  513 + free(src[0]); //free buffer resources
  514 + free(src[1]);
  515 + free(dst[0]);
  516 + free(dst[1]);
  517 + return true; //return true
409 518 }
410 519  
411 520 /// Return a baseline corrected band given two adjacent baseline points and their bands. The result is stored in a pre-allocated array.
... ... @@ -1238,6 +1347,60 @@ public:
1238 1347 if(PROGRESS) progress = (double)(b+1) / (double)B * 100;
1239 1348 }
1240 1349  
  1350 + } //end deriv
  1351 +
  1352 + bool multiply(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1353 + unsigned long long B = Z(); //calculate the number of bands
  1354 + unsigned long long XY = X() * Y(); //calculate the number of pixels in a band
  1355 + unsigned long long S = XY * sizeof(T); //calculate the number of bytes in a band
  1356 +
  1357 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1358 + std::string headername = outname + ".hdr"; //the header file name
  1359 +
  1360 + T * c; //pointer to the current image
  1361 + c = (T*)malloc( S ); //allocate memory for the band image
  1362 +
  1363 + for(unsigned long long j = 0; j < B; j++){ //for each band
  1364 + band_index(c, j); //load the current band
  1365 + for(unsigned long long i = 0; i < XY; i++){ //for each pixel
  1366 + if(mask == NULL || mask[i]) //if the pixel is masked
  1367 + c[i] *= (T)v; //perform the multiplication
  1368 + }
  1369 + target.write(reinterpret_cast<const char*>(c), S); //write normalized data into destination
  1370 +
  1371 + if(PROGRESS) progress = (double)(j+1) / B * 100; //update the progress
  1372 + }
  1373 +
  1374 + free(c); //free the band
  1375 + target.close(); //close the output file
  1376 + return true;
  1377 + }
  1378 +
  1379 + bool add(std::string outname, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1380 + unsigned long long B = Z(); //calculate the number of bands
  1381 + unsigned long long XY = X() * Y(); //calculate the number of pixels in a band
  1382 + unsigned long long S = XY * sizeof(T); //calculate the number of bytes in a band
  1383 +
  1384 + std::ofstream target(outname.c_str(), std::ios::binary); //open the target binary file
  1385 + std::string headername = outname + ".hdr"; //the header file name
  1386 +
  1387 + T * c; //pointer to the current image
  1388 + c = (T*)malloc( S ); //allocate memory for the band image
  1389 +
  1390 + for(unsigned long long j = 0; j < B; j++){ //for each band
  1391 + band_index(c, j); //load the current band
  1392 + for(unsigned long long i = 0; i < XY; i++){ //for each pixel
  1393 + if(mask == NULL || mask[i]) //if the pixel is masked
  1394 + c[i] += (T)v; //perform the multiplication
  1395 + }
  1396 + target.write(reinterpret_cast<const char*>(c), S); //write normalized data into destination
  1397 +
  1398 + if(PROGRESS) progress = (double)(j+1) / B * 100; //update the progress
  1399 + }
  1400 +
  1401 + free(c); //free the band
  1402 + target.close(); //close the output file
  1403 + return true;
1241 1404 }
1242 1405  
1243 1406  
... ...
stim/envi/envi.h
... ... @@ -7,6 +7,7 @@
7 7 #include "../envi/bil.h"
8 8 #include "../math/fd_coefficients.h"
9 9 #include <iostream>
  10 +#include <fstream>
10 11 //#include "../image/image.h"
11 12  
12 13 namespace stim{
... ... @@ -58,15 +59,17 @@ class envi{
58 59 for(size_t i = 0; i < len; i++)
59 60 cast(&dst[i], &src[i]);
60 61 }
61   -
  62 +
62 63 public:
  64 + envi_header header;
63 65  
  66 +
64 67 /// Default constructor
65 68 envi(){
66 69 file = NULL; //set the file pointer to NULL
67 70 }
68 71  
69   - envi_header header;
  72 +
70 73  
71 74 void* malloc_spectrum(){
72 75 return alloc_array(header.bands);
... ... @@ -76,6 +79,40 @@ public:
76 79 return alloc_array(header.samples * header.lines);
77 80 }
78 81  
  82 + void set_buffer(double memfrac = 0.5){
  83 + if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
  84 + if(header.data_type ==envi_header::float32)
  85 + ((bsq<float>*)file)->set_buffer(memfrac);
  86 + else if(header.data_type == envi_header::float64)
  87 + ((bsq<double>*)file)->set_buffer(memfrac);
  88 + else
  89 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  90 + }
  91 +
  92 + else if(header.interleave == envi_header::BIL){ //if the infile is bil file
  93 + if(header.data_type ==envi_header::float32)
  94 + ((bil<float>*)file)->set_buffer(memfrac);
  95 + else if(header.data_type == envi_header::float64)
  96 + ((bil<double>*)file)->set_buffer(memfrac);
  97 + else
  98 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  99 + }
  100 +
  101 + else if(header.interleave == envi_header::BIP){ //if the infile is bip file
  102 + if(header.data_type ==envi_header::float32)
  103 + ((bip<float>*)file)->set_buffer(memfrac);
  104 + else if(header.data_type == envi_header::float64)
  105 + ((bip<double>*)file)->set_buffer(memfrac);
  106 + else
  107 + std::cout<<"ERROR: unidentified data type"<<std::endl;
  108 + }
  109 +
  110 + else{
  111 + std::cout<<"ERROR: unidentified file type"<<std::endl;
  112 + exit(1);
  113 + }
  114 + }
  115 +
79 116 /// Returns the size of the data type in bytes
80 117 unsigned int type_size(){
81 118 if(header.data_type == envi_header::float32) return 4;
... ... @@ -224,6 +261,37 @@ public:
224 261  
225 262 }
226 263  
  264 + /// Open an Agilent binary file as an ENVI stream
  265 + bool open_agilent(std::string filename){
  266 + fname = filename; //store the file name
  267 +
  268 + //Open the file temporarily to get the header information
  269 + FILE* f = fopen(filename.c_str(), "r"); //open the binary file for reading
  270 + if(f == NULL) return false; //return false if no file is opened
  271 +
  272 + fseek(f, 9, SEEK_SET); //seek to the number of bands
  273 + short b; //allocate space for the number of bands
  274 + fread(&b, sizeof(short), 1, f); //read the number of bands
  275 + fseek(f, 13, SEEK_CUR); //skip the the x and y dimensions
  276 + short x, y;
  277 + fread(&x, sizeof(short), 1, f); //read the image x and y size
  278 + fread(&y, sizeof(short), 1, f);
  279 + fclose(f); //close the file
  280 +
  281 + //store the information from the Agilent header in the ENVI header
  282 + header.bands = b;
  283 + header.samples = x;
  284 + header.lines = y;
  285 + header.data_type = envi_header::float32; //all values are 32-bit floats
  286 + header.header_offset = 1020; //number of bytes in an Agilent binary header
  287 + header.interleave = envi_header::BSQ; //all Agilent binary files are BSQ
  288 +
  289 + allocate(); //allocate the streaming file object
  290 + open(); //open the file for streaming
  291 +
  292 + return true;
  293 + }
  294 +
227 295 /// Open an existing ENVI file given the filename and a header structure
228 296  
229 297 /// @param filename is the name of the ENVI binary file
... ... @@ -257,7 +325,6 @@ public:
257 325 //header.load(headername);
258 326  
259 327 return open(filename, h);
260   -
261 328 }
262 329  
263 330 /// Normalize a hyperspectral ENVI file given a band number and threshold.
... ... @@ -454,9 +521,9 @@ public:
454 521 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
455 522 ((bsq<float>*)file)->bil(outfile, PROGRESS);
456 523 else if(interleave == envi_header::BIP){ //ERROR
457   - std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
458   - //return ((bsq<float>*)file)->bip(outfile, PROGRESS);
459   - exit(1);
  524 + //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
  525 + ((bsq<float>*)file)->bip(outfile, PROGRESS);
  526 + //exit(1);
460 527 }
461 528 }
462 529  
... ... @@ -468,9 +535,9 @@ public:
468 535 else if(interleave == envi_header::BIL) //convert BSQ -> BIL
469 536 ((bsq<double>*)file)->bil(outfile, PROGRESS);
470 537 else if(interleave == envi_header::BIP){ //ERROR
471   - std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
472   - //return ((bsq<float>*)file)->bip(outfile, PROGRESS);
473   - exit(1);
  538 + //std::cout<<"ERROR: conversion from BSQ to BIP isn't practical; use BSQ->BIL->BIP instead"<<std::endl;
  539 + ((bsq<float>*)file)->bip(outfile, PROGRESS);
  540 + //exit(1);
474 541 }
475 542 }
476 543  
... ... @@ -1106,46 +1173,6 @@ public:
1106 1173 return false;
1107 1174 }
1108 1175  
1109   - /// Retrieve a spectrum from the specified location
1110   -
1111   - /// @param ptr is a pointer to pre-allocated memory of size B*sizeof(T)
1112   - /// @param x is the x-coordinate of the spectrum
1113   - /// @param y is the y-coordinate of the spectrum
1114   - /*bool spectrum(void* ptr, unsigned long long x, unsigned long long y, bool PROGRESS = false){
1115   -
1116   - if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
1117   - if(header.data_type ==envi_header::float32)
1118   - return ((bsq<float>*)file)->spectrum((float*)ptr, x, y, PROGRESS);
1119   - else if (header.data_type == envi_header::float64)
1120   - return ((bsq<double>*)file)->spectrum((double*)ptr, x, y, PROGRESS);
1121   - else{
1122   - std::cout << "ERROR: unidentified data type" << std::endl;
1123   - exit(1);
1124   - }
1125   - }
1126   - else if (header.interleave == envi_header::BIL){
1127   - if (header.data_type == envi_header::float32)
1128   - return ((bil<float>*)file)->spectrum((float*)ptr, x, y, PROGRESS);
1129   - else if (header.data_type == envi_header::float64)
1130   - return ((bil<double>*)file)->spectrum((double*)ptr, x, y, PROGRESS);
1131   - else{
1132   - std::cout << "ERROR: unidentified data type" << std::endl;
1133   - exit(1);
1134   - }
1135   - }
1136   - else if (header.interleave == envi_header::BIP){
1137   - if (header.data_type == envi_header::float32)
1138   - return ((bip<float>*)file)->spectrum((float*)ptr, x, y, PROGRESS);
1139   - else if (header.data_type == envi_header::float64)
1140   - return ((bip<double>*)file)->spectrum((double*)ptr, x, y, PROGRESS);
1141   - else{
1142   - std::cout << "ERROR: unidentified data type" << std::endl;
1143   - exit(1);
1144   - }
1145   - }
1146   - return false;
1147   - }*/
1148   -
1149 1176 // Retrieve a spectrum at the specified 1D location
1150 1177  
1151 1178 /// @param ptr is a pointer to pre-allocated memory of size B*sizeof(T)
... ... @@ -1209,50 +1236,6 @@ public:
1209 1236 void spectrum(T* ptr, size_t x, size_t y, bool PROGRESS = false){
1210 1237  
1211 1238 spectrum<T>(ptr, y * header.samples + x, PROGRESS);
1212   - /*void* temp = alloc_array<T>(header.bands); //allocate space for the output array
1213   -
1214   - if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
1215   - if(header.data_type ==envi_header::float32){
1216   - ((bsq<float>*)file)->spectrum((float*)temp, x, y, PROGRESS);
1217   - cast<T, float>(ptr, temp, header.bands);
1218   - }
1219   - else if (header.data_type == envi_header::float64){
1220   - ((bsq<double>*)file)->spectrum((double*)temp, x, y, PROGRESS);
1221   - cast<T, double>(ptr, temp, header.bands);
1222   - }
1223   - else{
1224   - std::cout << "ERROR: unidentified data type" << std::endl;
1225   - exit(1);
1226   - }
1227   - }
1228   - else if (header.interleave == envi_header::BIL){
1229   - if (header.data_type == envi_header::float32){
1230   - ((bil<float>*)file)->spectrum((float*)temp, x, y, PROGRESS);
1231   - cast<T, float>(ptr, temp, header.bands);
1232   - }
1233   - else if (header.data_type == envi_header::float64){
1234   - ((bil<double>*)file)->spectrum((double*)temp, x, y, PROGRESS);
1235   - cast<T, double>(ptr, temp, header.bands);
1236   - }
1237   - else{
1238   - std::cout << "ERROR: unidentified data type" << std::endl;
1239   - exit(1);
1240   - }
1241   - }
1242   - else if (header.interleave == envi_header::BIP){
1243   - if (header.data_type == envi_header::float32){
1244   - ((bip<float>*)file)->spectrum((float*)temp, x, y, PROGRESS);
1245   - cast<T, float>(ptr, temp, header.bands);
1246   - }
1247   - else if (header.data_type == envi_header::float64){
1248   - ((bip<double>*)file)->spectrum((double*)temp, x, y, PROGRESS);
1249   - cast<T, double>(ptr, temp, header.bands);
1250   - }
1251   - else{
1252   - std::cout << "ERROR: unidentified data type" << std::endl;
1253   - exit(1);
1254   - }
1255   - }*/
1256 1239 }
1257 1240  
1258 1241 /// Retrieve a single band (based on index) and stores it in pre-allocated memory.
... ... @@ -1340,14 +1323,6 @@ public:
1340 1323 if (header.interleave == envi_header::BSQ){
1341 1324 std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl;
1342 1325 exit(1);
1343   - /*if (header.data_type == envi_header::float32)
1344   - return ((bsq<float>*)file)->co_matrix(co, avg, mask, PROGRESS);
1345   - else if (header.data_type == envi_header::float64)
1346   - return ((bsq<double>*)file)->co_matrix(co, avg, mask, PROGRESS);
1347   - else{
1348   - std::cout << "ERROR: unidentified data type" << std::endl;
1349   - exit(1);
1350   - }*/
1351 1326 }
1352 1327 else if (header.interleave == envi_header::BIL){
1353 1328 if (header.data_type == envi_header::float32)
... ... @@ -1372,6 +1347,35 @@ public:
1372 1347 return false;
1373 1348 }
1374 1349  
  1350 + /// Calculate the covariance of noise matrix for all masked pixels in the image.
  1351 +
  1352 + /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
  1353 + /// @param avg is a pointer to memory of size B that stores the average spectrum
  1354 + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
  1355 + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){
  1356 + if (header.interleave == envi_header::BSQ){
  1357 + std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl;
  1358 + exit(1);
  1359 + }
  1360 +
  1361 +
  1362 + else if (header.interleave == envi_header::BIL){
  1363 + std::cout<<"ERROR: calculating the covariance matrix of noise for a BIL file is impractical; convert to BIP first"<<std::endl;
  1364 + exit(1);
  1365 + }
  1366 +
  1367 + else if (header.interleave == envi_header::BIP){
  1368 + if (header.data_type == envi_header::float32)
  1369 + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
  1370 + else if (header.data_type == envi_header::float64)
  1371 + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
  1372 + else{
  1373 + std::cout << "ERROR: unidentified data type" << std::endl;
  1374 + exit(1);
  1375 + }
  1376 + }
  1377 + return false;
  1378 + }
1375 1379  
1376 1380 /// Crop a region of the image and save it to a new file.
1377 1381  
... ... @@ -1635,7 +1639,81 @@ public:
1635 1639 }
1636 1640 exit(1);
1637 1641 }
1638   -};
  1642 +
  1643 + void multiply(std::string outfile, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1644 + header.save(outfile + ".hdr");
  1645 + if (header.interleave == envi_header::BSQ){
  1646 + if (header.data_type == envi_header::float32)
  1647 + ((bsq<float>*)file)->multiply(outfile, v, mask, PROGRESS);
  1648 + else if (header.data_type == envi_header::float64)
  1649 + ((bsq<double>*)file)->multiply(outfile, v, mask, PROGRESS);
  1650 + else{
  1651 + std::cout << "ERROR: unidentified data type" << std::endl;
  1652 + exit(1);
  1653 + }
  1654 + }
  1655 +
  1656 + else if (header.interleave == envi_header::BIL){
  1657 + if (header.data_type == envi_header::float32)
  1658 + ((bil<float>*)file)->multiply(outfile, v, mask, PROGRESS);
  1659 + else if (header.data_type == envi_header::float64)
  1660 + ((bil<double>*)file)->multiply(outfile, v, mask, PROGRESS);
  1661 + else{
  1662 + std::cout << "ERROR: unidentified data type" << std::endl;
  1663 + exit(1);
  1664 + }
  1665 + }
  1666 +
  1667 + else if (header.interleave == envi_header::BIP){
  1668 + if (header.data_type == envi_header::float32)
  1669 + ((bip<float>*)file)->multiply(outfile, v, mask, PROGRESS);
  1670 + else if (header.data_type == envi_header::float64)
  1671 + ((bip<double>*)file)->multiply(outfile, v, mask, PROGRESS);
  1672 + else{
  1673 + std::cout << "ERROR: unidentified data type" << std::endl;
  1674 + exit(1);
  1675 + }
  1676 + }
  1677 + exit(1);
  1678 + }
  1679 +
  1680 + void add(std::string outfile, double v, unsigned char* mask = NULL, bool PROGRESS = false){
  1681 + header.save(outfile + ".hdr");
  1682 + if (header.interleave == envi_header::BSQ){
  1683 + if (header.data_type == envi_header::float32)
  1684 + ((bsq<float>*)file)->add(outfile, v, mask, PROGRESS);
  1685 + else if (header.data_type == envi_header::float64)
  1686 + ((bsq<double>*)file)->add(outfile, v, mask, PROGRESS);
  1687 + else{
  1688 + std::cout << "ERROR: unidentified data type" << std::endl;
  1689 + exit(1);
  1690 + }
  1691 + }
  1692 +
  1693 + else if (header.interleave == envi_header::BIL){
  1694 + if (header.data_type == envi_header::float32)
  1695 + ((bil<float>*)file)->add(outfile, v, mask, PROGRESS);
  1696 + else if (header.data_type == envi_header::float64)
  1697 + ((bil<double>*)file)->add(outfile, v, mask, PROGRESS);
  1698 + else{
  1699 + std::cout << "ERROR: unidentified data type" << std::endl;
  1700 + exit(1);
  1701 + }
  1702 + }
  1703 +
  1704 + else if (header.interleave == envi_header::BIP){
  1705 + if (header.data_type == envi_header::float32)
  1706 + ((bip<float>*)file)->add(outfile, v, mask, PROGRESS);
  1707 + else if (header.data_type == envi_header::float64)
  1708 + ((bip<double>*)file)->add(outfile, v, mask, PROGRESS);
  1709 + else{
  1710 + std::cout << "ERROR: unidentified data type" << std::endl;
  1711 + exit(1);
  1712 + }
  1713 + }
  1714 + exit(1);
  1715 + }
  1716 +}; //end ENVI
1639 1717  
1640 1718 } //end namespace rts
1641 1719  
... ...
stim/envi/envi_header.h
... ... @@ -440,9 +440,24 @@ struct envi_header
440 440 }
441 441  
442 442 /// Convert a wavelength to a band index (or a pair of surrounding band indices)
  443 + /// if the file doesn't specify wavelengths, w is assumed to be a band index
443 444 std::vector<size_t> band_index(double w){
444 445 std::vector<size_t> idx; //create an empty array of indices
445   - if(w < wavelength[0] || w > wavelength[bands-1]) return idx; //if the wavelength range is outside of the file, return an empty array
  446 + if(wavelength.size() == 0){ //if a wavelength vector doesn't exist, assume the passed value is a band
  447 + if(w < 0 || w > bands-1) return idx; //if the band is outside the given band range, return an empty vector
  448 + size_t low, high; //allocate space for the floor and ceiling
  449 + low = (size_t)std::floor(w); //calculate the floor
  450 + high = (size_t)std::ceil(w); //calculate the ceiling
  451 + if(low == high) //if the floor and ceiling are the same
  452 + idx.push_back(low); //return a vector with one element (the given w matches a band exactly)
  453 + else{
  454 + idx.resize(2); //otherwise return the floor and ceiling
  455 + idx[0] = low;
  456 + idx[1] = high;
  457 + }
  458 + return idx;
  459 + }
  460 + else if(w < wavelength[0] || w > wavelength[bands-1]) return idx; //if the wavelength range is outside of the file, return an empty array
446 461  
447 462 for(size_t b = 0; b < bands; b++){ //for each band in the wavelength vector
448 463 if(wavelength[b] == w){ //if an exact match is found
... ...
stim/envi/hsi.h
... ... @@ -149,13 +149,13 @@ public:
149 149 for(size_t i = 0; i < R[0] * R[1]; i++){ //for each pixel in that page
150 150  
151 151 #ifdef _WIN32
152   - if(!_finite(page[i])){ //if the value at index i is finite
  152 + if(!_finite(page[i])){ //if the value at index i is not finite
153 153 #else
154   - if(!std::isfinite(page[i])){ //C++11 implementation
  154 + if(!std::isfinite(page[i])){ //C++11 implementation
155 155 #endif
156 156 size_t x, y, b;
157   - xyb(p * R[0] * R[1] + i, x, y, b); //find the 3D coordinates of the value
158   - mask[ y * X() + x ] = 0; //mask the pixel (it's not bad)
  157 + xyb(p * R[0] * R[1] + i, x, y, b); //find the 3D coordinates of the value
  158 + mask[ y * X() + x ] = 0; //remove the pixel (it's bad)
159 159 }
160 160 }
161 161 if(PROGRESS) progress = (double)(p + 1) / (double)R[2] * 100;
... ... @@ -202,6 +202,24 @@ public:
202 202 }
203 203 }
204 204  
  205 + void read(T* dest, size_t x, size_t y, size_t z, size_t sx, size_t sy, size_t sz){
  206 + size_t d[3]; //position in the binary coordinate system
  207 + size_t sd[3]; //size in the binary coordinate system
  208 +
  209 + d[O[0]] = x; //set the point in the binary coordinate system
  210 + d[O[1]] = y;
  211 + d[O[2]] = z;
  212 +
  213 + sd[O[0]] = sx; //set the size in the binary coordinate system
  214 + sd[O[1]] = sy;
  215 + sd[O[2]] = sz;
  216 +
  217 + if(!binary<T>::read(dest, d[0], d[1], d[2], sd[0], sd[1], sd[2])){
  218 + std::cout<<"error reading block in stim::hsi: ("<<d[0]<<", "<<d[1]<<", "<<d[2]<<") - ["<<sd[0]<<", "<<sd[1]<<", "<<sd[2]<<"]"<<std::endl;
  219 + exit(1);
  220 + }
  221 + }
  222 +
205 223 };
206 224  
207 225 } //end namespace STIM
... ...
stim/gl/gl_texture.h
1 1 #ifndef STIM_GL_TEXTURE_H
2 2 #define STIM_GL_TEXTURE_H
3 3  
4   -
5   -
6   -
7   -/*
8   -includes not necessary (yet)
9   -
10   -#include <iterator>
11   -#include <algorithm>
12   -
13   -
14   -*/
15   -
16 4 #include <math.h>
17 5 #include <iostream>
18 6 #include <vector>
19 7 #include "../grids/image_stack.h"
20   -#include <GL/glut.h>
21   -//#include <GL/glext.h>
22   -#include "./error.h"
  8 +//Visual Studio requires GLEW
  9 +#ifdef _WIN32
  10 + #include <GL/glew.h>
  11 +#endif
  12 +//#include <GL/glut.h>
  13 +#include <stim/gl/error.h>
23 14 namespace stim{
24 15  
25 16 /*
... ... @@ -27,195 +18,282 @@ class gl_texture
27 18 Uses image_stack class in order to create a texture object.
28 19 */
29 20  
30   -template<typename T>
31   -class gl_texture : public virtual image_stack<T>
  21 +template<typename T, typename F = float>
  22 +class gl_texture : public virtual image_stack<T, F>
32 23 {
33   - private:
34   - /// Sets the internal texture_type, based on the data
35   - /// size. Either 3D, 2D, 1D textures.
36   -
37   - void
38   - setTextureType()
39   - {
40   - if (R[3] > 1)
41   - texture_type = GL_TEXTURE_3D;
42   - else if (R[3] == 1 && R[2] == 0)
43   - texture_type = GL_TEXTURE_1D;
44   - else if (R[3] == 1)
45   - texture_type = GL_TEXTURE_2D;
46   - }
47 24 protected:
48   - std::string path;
  25 + //std::string path;
49 26 GLuint texID; //OpenGL object
50 27 GLenum texture_type; //1D, 2D, 3D
51   - GLint interpType;
52   - GLint texWrap;
53   - GLenum type;
54   - GLenum format;
  28 + GLint interpolation;
  29 + GLint wrap;
  30 + GLenum cpu_type;
  31 + GLenum gpu_type;
  32 + GLenum format; //format for the texture (GL_RGBA, GL_LUMINANCE, etc.)
55 33 using image_stack<T>::R;
56   - using image_stack<T>::S;
  34 + //using image_stack<T>::S;
57 35 using image_stack<T>::ptr;
58   - using image_stack<T>::samples;
  36 +
  37 + /// Sets the internal texture_type, based on the data dimensions
  38 + void setTextureType(){
  39 + if (R[3] > 1) //if the third dimension is greater than 1
  40 + texture_type = GL_TEXTURE_3D; //this is a 3D texture
  41 + else if (R[2] > 1) //if the second dimension is greater than 1
  42 + texture_type = GL_TEXTURE_2D; //this is a 2D texture
  43 + else if (R[1] > 1) //if the dimension value is greater than 1
  44 + texture_type = GL_TEXTURE_1D; //this is a 1D texture
  45 + }
  46 +
  47 + //initializes important variables
  48 + void init() {
  49 + texID = 0; //initialize texture ID to zero, default if OpenGL returns an error
  50 + //memset(R, 0, sizeof(size_t));
  51 + //memset(grid<T, 4, F>::S, 0, sizeof(F));
  52 + }
  53 +
  54 + //guesses the color format of the texture
  55 + GLenum guess_format(){
  56 + size_t channels = R[0];
  57 + switch(channels){
  58 + case 1:
  59 + return GL_LUMINANCE;
  60 + case 2:
  61 + return GL_RG;
  62 + case 3:
  63 + return GL_RGB;
  64 + case 4:
  65 + return GL_RGBA;
  66 + default:
  67 + std::cout<<"Error in stim::gl_texture - unable to guess texture format based on number of channels ("<<R[4]<<")"<<std::endl;
  68 + exit(1);
  69 + }
  70 + }
  71 +
  72 + //guesses the OpenGL CPU data type based on T
  73 + GLenum guess_cpu_type(){
  74 + // The following is C++ 11 code, but causes problems on some compilers (ex. nvcc). Below is my best approximation to a solution
  75 +
  76 + //if(std::is_same<T, unsigned char>::value) return CV_MAKETYPE(CV_8U, (int)C());
  77 + //if(std::is_same<T, char>::value) return CV_MAKETYPE(CV_8S, (int)C());
  78 + //if(std::is_same<T, unsigned short>::value) return CV_MAKETYPE(CV_16U, (int)C());
  79 + //if(std::is_same<T, short>::value) return CV_MAKETYPE(CV_16S, (int)C());
  80 + //if(std::is_same<T, int>::value) return CV_MAKETYPE(CV_32S, (int)C());
  81 + //if(std::is_same<T, float>::value) return CV_MAKETYPE(CV_32F, (int)C());
  82 + //if(std::is_same<T, double>::value) return CV_MAKETYPE(CV_64F, (int)C());
  83 +
  84 + if(typeid(T) == typeid(unsigned char)) return GL_UNSIGNED_BYTE;
  85 + if(typeid(T) == typeid(char)) return GL_BYTE;
  86 + if(typeid(T) == typeid(unsigned short)) return GL_UNSIGNED_SHORT;
  87 + if(typeid(T) == typeid(short)) return GL_SHORT;
  88 + if(typeid(T) == typeid(unsigned int)) return GL_UNSIGNED_INT;
  89 + if(typeid(T) == typeid(int)) return GL_INT;
  90 + if(typeid(T) == typeid(float)) return GL_FLOAT;
  91 +
  92 + std::cout<<"ERROR in stim::gl_texture - no valid data type found"<<std::endl;
  93 + exit(1);
  94 + }
  95 +
  96 + //Guesses the "internal format" of the texture to closely approximate the original format
  97 + GLint guess_gpu_type(){
  98 + switch(format){
  99 + case GL_LUMINANCE:
  100 + switch(cpu_type){
  101 + case GL_BYTE:
  102 + case GL_UNSIGNED_BYTE:
  103 + return GL_LUMINANCE8;
  104 + case GL_SHORT:
  105 + case GL_UNSIGNED_SHORT:
  106 + return GL_LUMINANCE16;
  107 + case GL_INT:
  108 + case GL_UNSIGNED_INT:
  109 + return GL_LUMINANCE32I_EXT;
  110 + case GL_FLOAT:
  111 + return GL_LUMINANCE32F_ARB;
  112 + default:
  113 + std::cout<<"error in stim::gl_texture - unable to guess GPU internal format"<<std::endl;
  114 + exit(1);
  115 + }
  116 + case GL_RGB:
  117 + switch(cpu_type){
  118 + case GL_BYTE:
  119 + case GL_UNSIGNED_BYTE:
  120 + return GL_RGB8;
  121 + case GL_SHORT:
  122 + case GL_UNSIGNED_SHORT:
  123 + return GL_RGB16;
  124 + case GL_INT:
  125 + case GL_UNSIGNED_INT:
  126 + return GL_RGB32I;
  127 + case GL_FLOAT:
  128 + return GL_RGB32F;
  129 + default:
  130 + std::cout<<"error in stim::gl_texture - unable to guess GPU internal format"<<std::endl;
  131 + exit(1);
  132 + }
  133 + case GL_RGBA:
  134 + switch(cpu_type){
  135 + case GL_BYTE:
  136 + case GL_UNSIGNED_BYTE:
  137 + return GL_RGBA8;
  138 + case GL_SHORT:
  139 + case GL_UNSIGNED_SHORT:
  140 + return GL_RGBA16;
  141 + case GL_INT:
  142 + case GL_UNSIGNED_INT:
  143 + return GL_RGBA32I;
  144 + case GL_FLOAT:
  145 + return GL_RGBA32F;
  146 + default:
  147 + std::cout<<"error in stim::gl_texture - unable to guess GPU internal format"<<std::endl;
  148 + exit(1);
  149 + }
  150 + default:
  151 + std::cout<<"error in stim::gl_texture - unable to guess GPU internal format"<<std::endl;
  152 + exit(1);
  153 + }
  154 + }
  155 + /// creates this texture in the current OpenGL context
  156 + void generate_texture(){
  157 + glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
  158 + CHECK_OPENGL_ERROR
  159 + glGenTextures(1, &texID);
  160 + CHECK_OPENGL_ERROR
  161 + glBindTexture(texture_type, texID);
  162 + CHECK_OPENGL_ERROR
  163 + glTexParameteri(texture_type, GL_TEXTURE_MIN_FILTER, interpolation);
  164 + CHECK_OPENGL_ERROR
  165 + glTexParameteri(texture_type, GL_TEXTURE_MAG_FILTER, interpolation);
  166 + CHECK_OPENGL_ERROR
  167 + switch(texture_type){
  168 + case GL_TEXTURE_3D:
  169 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_S, wrap);
  170 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_T, wrap);
  171 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_R, wrap);
  172 + glTexImage3D(texture_type, 0, gpu_type, (GLsizei)R[1], (GLsizei)R[2], (GLsizei)R[3], 0, format, cpu_type, ptr);
  173 + break;
  174 + case GL_TEXTURE_2D:
  175 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_S, wrap);
  176 + CHECK_OPENGL_ERROR
  177 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_T, wrap);
  178 + CHECK_OPENGL_ERROR
  179 + glTexImage2D(texture_type, 0, gpu_type, (GLsizei)R[1], (GLsizei)R[2], 0, format, cpu_type, ptr);
  180 + CHECK_OPENGL_ERROR
  181 + break;
  182 + case GL_TEXTURE_1D:
  183 + glTexParameteri(texture_type, GL_TEXTURE_WRAP_S, wrap);
  184 + CHECK_OPENGL_ERROR
  185 + glTexImage1D(texture_type, 0, gpu_type, (GLsizei)R[1], 0, format, cpu_type, ptr);
  186 + CHECK_OPENGL_ERROR
  187 + break;
  188 + default:
  189 + std::cout<<"Error in stim::gl_texture - unrecognized texture target when generating texture"<<std::endl;
  190 + exit(1);
  191 + break;
  192 + }
  193 + CHECK_OPENGL_ERROR
  194 + }
  195 + void guess_parameters(){
  196 + setTextureType(); //set the texture type: 1D, 2D, 3D
  197 + format = guess_format(); //guess the texture format based on the number of image channels
  198 + cpu_type = guess_cpu_type(); //guess the CPU type based on the template
  199 + gpu_type = guess_gpu_type(); //guess the GPU type based on the format and template
  200 + }
59 201  
60 202 public:
61 203  
62 204 ///default constructor
63   - gl_texture()
64   - {
65   -
  205 + gl_texture( GLint interp = GL_LINEAR, //default to linear interpolation
  206 + GLint twrap = GL_REPEAT) //default repeating the texture at the edges
  207 + : image_stack<T>() {
  208 + init(); //initialize the texture with NULL values
  209 + interpolation = interp; //store the interpolation type
  210 + wrap = twrap; //store the wrap type
66 211 }
67 212  
68   - ///@param string path to the directory with the image files.
69   - ///Creates an instance of the gl_texture object with a path to the data.
  213 + ///@param is a mask indicating the files to load
  214 + ///Creates an instance of the gl_texture object and initializes it with a file list
  215 +
  216 + gl_texture(std::string file_mask, GLint interp = GL_LINEAR, GLint twrap = GL_REPEAT){
  217 + init();
  218 + interpolation = interp; //store interpolation type
  219 + wrap = twrap; //store wrap type
  220 + load_images(file_mask);
  221 + }
70 222  
71   - gl_texture(std::string file_mask)
72   - {
73   - //path = file_mask;
74   - image_stack<T>::load_images(file_mask);
75   - setTextureType();
  223 + ///Creates an instance of gl_texture and initializes with a file list
  224 + ///@param file_list is a list of files
  225 + ///@param interp is the type of texture interpolation (GL_LINEAR, GL_NEAREST)
  226 + ///@param twrap is the type of texture wrapping
  227 + gl_texture(std::vector<std::string> file_list, GLint interp = GL_LINEAR, GLint twrap = GL_REPEAT){
  228 + init();
  229 + interpolation = interp;
  230 + wrap = twrap;
  231 + load_images(file_list);
  232 + }
  233 +
  234 + ///Attaches the texture to the current OpenGL context and makes it ready to render
  235 + void attach(){
  236 + if(texID == 0) generate_texture(); //generate the texture if it doesn't already exist
  237 + else{
  238 + std::cout<<"Texture has already been attached to a context."<<std::endl;
  239 + exit(1);
  240 + }
  241 + }
  242 +
  243 + //binds a texture to be the current render source
  244 + void bind(){
  245 + glBindTexture(texture_type, texID); //bind the texture to the appropriate texture target
  246 + CHECK_OPENGL_ERROR
76 247 }
77 248  
78 249 ///returns the dimentions of the data in the x, y, z directions.
79   - vec<int>
80   - getSize()
81   - {
  250 + vec<int> getSize(){
82 251 stim::vec<int> size(R[1], R[2], R[3]);
83 252 return size;
84 253 }
85 254  
86   - ///@param GLint interp --GL_LINEAR, GL_NEAREST...
87   - ///@param GLint twrap --GL_REPEAR, GL_CLAMP_TO_EDGE...
88   - ///@param GLenum dataType --GL_UNSIGNED_BYTE, GL_FLOAT16...
89   - ///@param GLenum dataFormat--GL_LUMINANCE, GL_RGB...
90   - /// Texture paramenters.
91   - void
92   - setTexParam(GLint interp = GL_LINEAR,
93   - GLint twrap = GL_CLAMP_TO_EDGE,
94   - GLenum dataType = GL_UNSIGNED_BYTE,
95   - GLenum dataFormat = GL_LUMINANCE)
96   - {
97   - interpType = interp;
98   - texWrap = twrap;
99   - type = dataType;
100   - format = dataFormat;
  255 + void getSize(size_t& x, size_t& y, size_t& z) {
  256 + x = R[0]; y = R[1]; z = R[2];
101 257 }
102 258  
103 259 ///@param x size of the voxel in x direction
104 260 ///@param y size of the voxel in y direction
105 261 ///@param z size of the voxel in z direction
106 262 /// Sets the dimenstions of the voxels.
107   - void
108   - setDims(float x, float y, float z)
109   - {
110   - S[1] = x;
111   - S[2] = y;
112   - S[3] = z;
  263 + void setSpacing(float sx, float sy, float sz){
  264 + grid<T, 4, F>::S[1] = sx;
  265 + grid<T, 4, F>::S[2] = sy;
  266 + grid<T, 4, F>::S[3] = sz;
113 267 }
114 268  
115 269 ///Returns a stim::vec that contains the x, y, z sizes of the voxel.
116   - vec<float>
117   - getDims()
118   - {
119   - vec<float> dims(S[1], S[2], S[3]);
  270 + vec<float> getDims(){
  271 + vec<float> dims(grid<T, 4, F>::S[1], grid<T, 4, F>::S[2], grid<T, 4, F>::S[3]);
120 272 return dims;
121   - }
  273 + }
122 274  
123   - ///@param file_Path location of the directory with the files
124   - /// Sets the path and calls the loader on that path.
125   - void
126   - setPath(std::string file_path)
127   - {
128   - path = file_path;
129   - image_stack<T>::load_images(path.append("/*.jpg"));
130   - setTextureType();
  275 + /// Loads a series of files specified by a list of strings
  276 + /// @param file_list is the vector of file names as strings
  277 + void load_images(std::vector<std::string> file_list){
  278 + image_stack<T, F>::load_images(file_list); //load the images
  279 + guess_parameters();
131 280 }
132   -
133   - /// Returns an std::string path associated with an instance of the gl_texture class.
134   - std::string
135   - getPath()
136   - {
137   - return path;
  281 +
  282 + ///@param file_mask specifies the file(s) to be loaded
  283 + /// Sets the path and calls the loader on that path.
  284 + void load_images(std::string file_mask){
  285 + image_stack<T>::load_images(file_mask); //load images
  286 + guess_parameters();
138 287 }
139 288  
140 289 /// Returns the GLuint id of the texture created by/associated with the
141   - /// instance of the gl_texture class.
142   -
143   - GLuint
144   - getTexture()
145   - {
  290 + /// instance of the gl_texture class.
  291 + GLuint getTexture(){
146 292 return texID;
147 293 }
148 294  
149   - /// Creates a texture and from the loaded data and
150   - /// assigns that texture to texID
151   - //TO DO :::: 1D textures
152   - //TO DO:::add methods for handling the cases of T
153   - // and convert them to GL equivalent.
154   - // i.e. an overloaded function that handles paramenter conversion.
155   - void
156   - createTexture()
157   - {
158   - glPixelStorei(GL_UNPACK_ALIGNMENT,1);
159   - glGenTextures(1, &texID);
160   - glBindTexture(texture_type, texID);
161   - glTexParameteri(texture_type,
162   - GL_TEXTURE_MIN_FILTER,
163   - interpType);
164   - glTexParameteri(texture_type,
165   - GL_TEXTURE_MAG_FILTER,
166   - interpType);
167   - switch(texture_type)
168   - {
169   - case GL_TEXTURE_3D:
170   - glTexParameteri(texture_type,
171   - GL_TEXTURE_WRAP_S,texWrap);
172   - // GL_REPEAT);
173   - // GL_CLAMP_TO_EDGE);
174   - glTexParameteri(texture_type,
175   - GL_TEXTURE_WRAP_T,texWrap);
176   - // GL_REPEAT);
177   - // GL_CLAMP_TO_EDGE);
178   - glTexParameteri(texture_type,
179   - GL_TEXTURE_WRAP_R,texWrap);
180   - // GL_REPEAT);
181   - // GL_CLAMP_TO_EDGE);
182   - glTexImage3D(texture_type,
183   - 0,
184   - // GL_RGB16,
185   - 1,
186   - R[1],
187   - R[2],
188   - R[3],
189   - 0,
190   - format,
191   - type,
192   - ptr);
193   - //GL_UNSIGNED_BYTE can be TYPES, convert to GL equivalents
194   - glPixelStorei(GL_PACK_ALIGNMENT,1);
195   - break;
196   - case GL_TEXTURE_2D:
197   - glTexParameteri(texture_type,
198   - GL_TEXTURE_WRAP_S, texWrap);
199   - glTexParameteri(texture_type,
200   - GL_TEXTURE_WRAP_T, texWrap);
201   - glTexImage2D(texture_type,
202   - 0,
203   - 1,
204   - R[1],
205   - R[2],
206   - 0,
207   - format,
208   - type,
209   - ptr);
210   - break;
211   - }
212   - }
213   - ///Temporary methods for debugging and testing are below.
214   - ///Self-explanatory.
215 295  
216   - T*
217   - getData()
218   - {
  296 + T* getData(){
219 297 return ptr;
220 298 }
221 299  
... ...
stim/grids/grid.h
... ... @@ -15,78 +15,119 @@ namespace stim{
15 15 Functions are provided for saving and loading binary data.
16 16  
17 17 **/
18   -template<typename T, unsigned int D = 1>
  18 +template<typename T, unsigned int D = 1, typename F = float>
19 19 class grid{
20 20  
21 21 protected:
22 22  
23   - stim::vec<unsigned long> R; //elements in each dimension
24   - stim::vec<float> S;
  23 + size_t R[D]; //elements in each dimension
  24 + F S[D]; //spacing between element samples
25 25 T* ptr; //pointer to the data (on the GPU or CPU)
26 26  
27   - ///Return the total number of values in the binary file
28   - unsigned long samples(){
29   -
30   - unsigned long s = 1;
31   - for(unsigned int d = 0; d < D; d++)
32   - s *= R[d];
33   -
34   - return s;
35   - }
  27 +
36 28  
37 29 ///Initializes a grid by allocating the necessary memory and setting all values to zero
38   - void init(){
39   -
40   - //calculate the total number of values
41   - unsigned long S = samples();
42   -
43   - //allocate memory to store the grid
44   - ptr = (T*)malloc(sizeof(T) * S);
45   -
46   - //initialize the memory to zero
47   - memset(ptr, 0, sizeof(T) * S);
  30 + void init(){
  31 + ptr = NULL; //initialize the data pointer to NULL
  32 + memset(R, 0, sizeof(size_t) * D); //set the resolution to zero
  33 + for(size_t d = 0; d < D; d++) S[d] = (F)1.0; //initialize the spacing to unity
  34 + }
48 35  
  36 + void alloc(){
  37 + if(ptr != NULL) free(ptr); //if memory has already been allocated, free it
  38 + size_t N = samples(); //calculate the total number of values
  39 + ptr = (T*)calloc(sizeof(T), N); //allocate memory to store the grid
49 40 }
50 41  
51 42 public:
52 43  
53 44 ///Default constructor doesn't do anything
54 45 grid(){
55   - ptr = NULL; //set the pointer to NULL so that we know nothing is allocated
  46 + init();
56 47 }
57 48  
58 49 ///Constructor used to specify the grid size as a vector
59 50  
60 51 /// @param _R is a vector describing the grid resolution
61   - grid( stim::vec<unsigned long> _R){
62   -
63   - //set the grid resolution
64   - R = _R;
65   -
  52 + grid( stim::vec<size_t> _R){
  53 + for (size_t d = 0; d < D; d++)
  54 + R[d] = _R[d];
66 55 init();
67 56 }
68 57  
  58 + ///Return the total number of values in the binary file
  59 + size_t samples(){
  60 + size_t s = 1;
  61 + for(size_t d = 0; d < D; d++)
  62 + s *= R[d];
  63 + return s;
  64 + }
  65 +
  66 + ///Return the number of bytes in the binary file
  67 + size_t bytes(){
  68 + return samples() * sizeof(T);
  69 + }
  70 +
69 71 void
70   - setDim(stim::vec<float> s)
71   - {
72   - S = s;
  72 + setDim(stim::vec<float> s){
  73 + for(size_t d = 0; d < D; d++)
  74 + S[d] = s[d];
73 75 }
74 76  
75 77 ///Constructor used to specify the grid size as a set of parameters
76   -
77 78 /// @param X0... is a list of values describing the grid size along each dimension
78   - grid( unsigned long X0, ...){
  79 + /*grid( size_t X0, ...){
  80 + R[0] = X0; //set the grid size of the first dimension
  81 + va_list ap; //get a variable list
  82 + va_start(ap, X0); //start the variable list at the first element
  83 + for(size_t d = 1; d<D; d++) //for each additional element
  84 + R[d] = va_arg(ap, size_t); //read the value from the variable list as a size_t
  85 + va_end(ap);
  86 + init(); //initialize the grid
  87 + }*/
  88 +
  89 + ///Set the spacing between grid sample points
  90 + /// @param X0... is a list of values describing the grid sample spacing
  91 + /*void spacing(F X0, ...) {
  92 + S[0] = X0; //set the grid size of the first dimension
  93 + va_list ap; //get a variable list
  94 + va_start(ap, X0); //start the variable list at the first element
  95 + for (size_t d = 1; d<D; d++) //for each additional element
  96 + S[d] = va_arg(ap, F); //read the value from the variable list as a size_t
  97 + va_end(ap);
  98 + }*/
  99 +
  100 + /// Set the spacing between grid sample points for the specified dimension
  101 + void spacing(size_t d, F sp){
  102 + if(d < D) S[d] = sp;
  103 + else{
  104 + std::cout<<"error in stim::grid::spacing() - insufficient dimensions"<<std::endl;
  105 + exit(1);
  106 + }
  107 + }
79 108  
80   - R[0] = X0;
  109 + /// Return the spacing for a given dimension
  110 + F spacing(size_t d){
  111 + if(d < D) return S[d];
  112 + else{
  113 + std::cout<<"error in stim::grid::spacing() - insufficient dimensions"<<std::endl;
  114 + exit(1);
  115 + }
  116 + }
81 117  
82   - va_list ap;
83   - va_start(ap, X0);
84   - for(unsigned int d = 1; d<D; d++)
85   - R[d] = va_arg(ap, unsigned long);
86   - va_end(ap);
  118 + /// Get the sample spacing for the given dimension
  119 + F get_spacing(size_t d) {
  120 + return S[d];
  121 + }
87 122  
88   - init();
  123 + /// Get the size of the grid along the specified dimension
  124 + F size(size_t d){
  125 + return (F)R[d] * S[d];
  126 + }
89 127  
  128 + /// Return the number of samples
  129 + size_t samples(size_t d){
  130 + return R[d];
90 131 }
91 132  
92 133 ///Writes the binary data to disk
... ... @@ -94,13 +135,9 @@ public:
94 135 /// @param filename is the name of the binary file to be written
95 136 void write(std::string filename){
96 137  
97   - std::fstream file;
98   -
99   - //open the file as binary for reading
100   - file.open(filename.c_str(), std::ios::out | std::ios::binary);
101   -
102   - //write file to disk
103   - file.write((char *)ptr, samples() * sizeof(T));
  138 + std::fstream file;
  139 + file.open(filename.c_str(), std::ios::out | std::ios::binary); //open the file as binary for reading
  140 + file.write((char *)ptr, samples() * sizeof(T)); //write file to disk
104 141 }
105 142  
106 143 ///Loads a binary file from disk
... ... @@ -108,66 +145,52 @@ public:
108 145 /// @param filename is the name of the file containing the binary data
109 146 /// @param S is the size of the binary file along each dimension
110 147 /// @param header is the size of the header in bytes
111   - void read(std::string filename, stim::vec<unsigned long> S, unsigned long header = 0){
112   -
113   - R = S; //set the sample resolution
114   -
115   - //allocate space for the data
116   - init();
117   -
118   - std::fstream file;
119   -
120   - //open the file as binary for writing
121   - file.open(filename.c_str(), std::ios::in | std::ios::binary);
122   -
123   - //seek past the header
124   - file.seekg(header, std::ios::beg);
125   -
126   -
127   - //read the data
128   - file.read((char *)ptr, samples() * sizeof(T));
  148 + void read(std::string filename, stim::vec<size_t> X, unsigned long header = 0){
  149 + for(size_t d = 0; d < D; d++)
  150 + R[d] = X[d]; //set the sample resolution
  151 + init(); //allocate space for the data
  152 + std::fstream file;
  153 + file.open(filename.c_str(), std::ios::in | std::ios::binary); //open the file as binary for writing
  154 + file.seekg(header, std::ios::beg); //seek past the header
  155 + file.read((char *)ptr, samples() * sizeof(T)); //read the data
129 156 }
130 157  
131 158 ///Gets a single value from the grid given a set of coordinates
132   -
133 159 /// @param x0... is a list of coordinates specifying the desired value
134   - T get(unsigned long x0, ...){
  160 + /*T get(unsigned long x0, ...){
135 161  
136   - va_list ap;
  162 + va_list ap; //create a variable list
137 163  
138   - unsigned long F = 1;
139   - unsigned long p = x0;
  164 + unsigned long F = 1; //initialize the dimension size to 1
  165 + unsigned long idx = x0;
140 166  
141   - va_start(ap, x0);
142   - for(unsigned int d = 1; d<D; d++){
143   - F *= R[d-1];
144   - p += va_arg(ap, unsigned int) * F;
  167 + va_start(ap, x0); //start a variable list
  168 + for(unsigned int d = 1; d<D; d++){ //for each dimension
  169 + F *= R[d-1]; //get the size of the first dimension
  170 + idx += va_arg(ap, unsigned int) * F; //increment the index
145 171 }
146 172 va_end(ap);
147 173  
148   - return ptr[p];
149   - }
  174 + return ptr[idx]; //access the appropriate element and return the value
  175 + }*/
150 176  
151 177 ///Sets a value in the grid
152 178  
153 179 /// @param value is the grid point value
154 180 /// @x0... is the coordinate of the value to be set
155   - void set(T value, unsigned long x0, ...){
156   -
157   - va_list ap;
158   -
159   - unsigned long F = 1;
160   - unsigned long p = x0;
161   -
162   - va_start(ap, x0);
163   - for(unsigned int d = 1; d<D; d++){
164   - F *= R[d-1];
165   - p += va_arg(ap, unsigned int) * F;
  181 + /*void set(T value, unsigned long x0, ...){
  182 + va_list ap; //create a variable list
  183 + unsigned long F = 1; //initialize the dimension counter to 1
  184 + unsigned long idx = x0; //initialize the index to the first variable
  185 +
  186 + va_start(ap, x0); //start the variable list
  187 + for(unsigned int d = 1; d<D; d++){ //for each dimension
  188 + F *= R[d - 1];
  189 + idx += va_arg(ap, unsigned int) * F; //update the index
166 190 }
167 191 va_end(ap);
168   -
169   - ptr[p] = value;
170   - }
  192 + ptr[idx] = value; //set the value at the indexed location
  193 + }*/
171 194  
172 195  
173 196 ///Outputs grid data as a string
... ... @@ -179,13 +202,11 @@ public:
179 202 for(unsigned int d = 0; d<D; d++){
180 203 if(d!=0) result<<", ";
181 204 result<<R[d];
182   -
183 205 }
184   -
185 206 result<<"]"<<std::endl;
186 207  
187 208 //calculate the number of values to output
188   - unsigned long nV = min((unsigned long long)R[0], (unsigned long long)10);
  209 + unsigned long nV = std::min((unsigned long long)R[0], (unsigned long long)10);
189 210  
190 211 for(unsigned long v = 0; v<nV; v++){
191 212 result<<ptr[v];
... ...
stim/grids/image_stack.h
... ... @@ -8,83 +8,112 @@
8 8  
9 9 namespace stim{
10 10  
11   -/**This class is used to load 3D grid data from stacks of images
12   - The class uses a 4D grid object, where the first dimension is a color value.
13   -**/
14   -template<typename T>
15   -class image_stack : public virtual stim::grid<T, 4>{
  11 +///This class is used to load 3D grid data from stacks of images
  12 +// The class uses a 4D grid object, where the first dimension is a color value.
  13 +template<typename T, typename F = float>
  14 +class image_stack : public virtual stim::grid<T, 4, F>{
16 15  
17 16 enum image_type {stimAuto, stimMono, stimRGB, stimRGBA};
18 17  
19 18 protected:
20   - using stim::grid<T, 4>::S;
  19 + //using stim::grid<T, 4>::S;
21 20 using stim::grid<T, 4>::R;
22 21 using stim::grid<T, 4>::ptr;
23   - using stim::grid<T, 4>::samples;
24 22 using stim::grid<T, 4>::read;
25 23  
26 24 public:
  25 + //default constructor
  26 + image_stack() : grid<T, 4>() {
27 27  
28   - ///Load an image stack based on a file mask. Images are loaded in alphanumeric order.
  28 + }
29 29  
30   - /// @param file_mask is the mask describing images to be loaded
31   - void load_images(std::string file_mask){
  30 + /// Overloads grid::samples() to return the number of samples associated with a given spatial dimension
  31 + /// this is necessary because R[0] stores the color
  32 + size_t samples(size_t d){
  33 + return grid<T, 4, F>::samples(d + 1);
  34 + }
32 35  
33   - stim::filename file_path(file_mask);
  36 + size_t samples(){
  37 + return R[1] * R[2] * R[3]; //return the number of spatial samples
  38 + }
  39 +
  40 + /// Returns the number of color channels
  41 + size_t channels(){
  42 + return R[0];
  43 + }
  44 +
  45 + /// Overloads grid::size() to return the size of the grid associated with a given spatial dimension
  46 + F size(size_t d){
  47 + return grid<T, 4, F>::size(d + 1);
  48 + }
34 49  
35   - //get the list of files
36   - std::vector<stim::filename> file_list = file_path.get_list();
  50 + /// Sets the spacing between samples in the image stack
  51 + void spacing(F sx, F sy, F sz){
  52 + grid<T, 4, F>::S[1] = sx; //set the sample spacing for the appropriate spatial dimension
  53 + grid<T, 4, F>::S[2] = sy;
  54 + grid<T, 4, F>::S[3] = sz;
  55 + }
  56 +
  57 + F spacing(size_t d){
  58 + return grid<T, 4, F>::spacing(d + 1);
  59 + }
  60 +
  61 + /// Overloads the spacing parameter to set the size of the grid associated with a given spatial dimension
  62 + //void spacing(F sx, F sy = 1.0f, F sz = 1.0f){
  63 + // grid<T, 4, F>::spacing((F)1.0, sx, sy, sz);
  64 + //}
  65 +
  66 + /// Load all of the images specified by a list of strings
  67 + /// @param string_list is a list of file names specifying images
  68 + void load_images(std::vector<std::string> string_list){
37 69  
38 70 //if there are no matching files, exit
39   - if(file_list.size() == 0){
  71 + if(string_list.size() == 0){
40 72 std::cout<<"STIM ERROR (image_stack): No matching files for loading a stack."<<std::endl;
41 73 exit(1);
42 74 }
43   - //for(int i = 0; i < file_list.size(); i++)
44   - // std::cout << file_list[i].str() << std::endl;
45 75  
46   - //load the first image and set all of the image_stack properties
47   - stim::image<T> I(file_list[0].str());
  76 + stim::image<T> I(string_list[0]); //load the first image and set all of the image_stack proparties
48 77  
49   - //set the image resolution and number of channels
50   - R.push(I.channels());
51   - R.push(I.width());
52   - R.push(I.height());
53   - R.push(file_list.size());
  78 + R[0] = I.channels(); //set the number of color channels
  79 + R[1] = I.width(); //set the stack height and width based on the image size
  80 + R[2] = I.height();
  81 + R[3] = string_list.size(); //set the stack z-resolution based on the number of images
54 82  
55   - //allocate storage space
56   - ptr = (T*)malloc(sizeof(T) * samples());
  83 + ptr = (T*)malloc(grid<T, 4, F>::bytes()); //allocate storage space
57 84  
58 85 //load and copy each image into the grid
59   - for(unsigned int i = 0; i<R[3]; i++){
60   - //load the image
61   - stim::image<T> I(file_list[i].str());
  86 + for(unsigned int i = 0; i<R[3]; i++){ //for each image in the list
  87 + stim::image<T> I(string_list[i]); //load the image
  88 + I.get_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ]); //retrieve the interlaced data from the image - store it in the grid
  89 + }
  90 + }
62 91  
63   - //retrieve the interlaced data from the image - store it in the grid
64   - I.get_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ]);
65   -
  92 + /// Load a stack of images based on a file mask. Images are loaded in alphanumeric order
  93 + /// @param file_mask is the mask describing the images to be loaded
  94 + void load_images(std::string file_mask){
  95 + stim::filename file_path(file_mask); //get the path for the images
  96 + std::vector<stim::filename> file_list = file_path.get_list(); //get the list of files
  97 + std::vector<std::string> string_list(file_list.size()); //allocate space for an array of strings
  98 + for(size_t f = 0; f < file_list.size(); f++){ //for each file name in the list
  99 + string_list[f] = file_list[f].str(); //convert the file name to a string
66 100 }
  101 + load_images(string_list); //load all of the images in the list
67 102 }
68 103  
69 104 ///Inserts image I into slot i.
70 105 /// @param stim::image<T> I; image to insert.
71 106 /// @int I, where to place the image.
72   - void insert_image(stim::image<T> I, int i)
73   - {
  107 + void insert_image(stim::image<T> I, int i){
74 108 I.get_interleaved_rgb(&ptr[i *R[0] *R[1] *R[2] ]);
75 109 }
76 110  
77 111 ///Saves a single page to an image file
78 112 /// @param file_name is the name of the image file to be created
79 113 /// @param i is the page to be saved
80   - void save_image(std::string file_name, unsigned int i){
81   -
82   - //create an image
83   - stim::image<T> I;
84   -
85   - //retrieve the interlaced data from the image - store it in the grid
86   - I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]);
87   -
  114 + void save_image(std::string file_name, unsigned int i){
  115 + stim::image<T> I; //create an image
  116 + I.set_interleaved_rgb(&ptr[ i * R[0] * R[1] * R[2] ], R[1], R[2], R[0]); //retrieve the interlaced data from the image - store it in the grid
88 117 I.save(file_name);
89 118 }
90 119  
... ... @@ -96,10 +125,10 @@ public:
96 125 void
97 126 set_dim(float x, float y, float z)
98 127 {
99   - S[0] = 1;
100   - S[1] = x;
101   - S[2] = y;
102   - S[3] = z;
  128 + grid<T, 4, F>::S[0] = 1;
  129 + grid<T, 4, F>::S[1] = x;
  130 + grid<T, 4, F>::S[2] = y;
  131 + grid<T, 4, F>::S[3] = z;
103 132 }
104 133  
105 134 ///set dimensions of the grid.
... ... @@ -124,12 +153,6 @@ public:
124 153  
125 154 stim::filename file_path(file_mask);
126 155  
127   - //if the file path is relative, update it with the current working directory
128   -// if(file_path.is_relative()){
129   -// stim::filename wd = stim::filename::cwd();
130   -// file_path = wd.get_relative(file_mask);
131   -// }
132   -
133 156 //create a list of file names
134 157 std::vector<std::string> file_list = stim::wildcards::increment(file_path.str(), 0, R[3]-1, 1);
135 158  
... ...
stim/image/image.h
... ... @@ -159,7 +159,10 @@ public:
159 159 std::cout<<"ERROR stim::image::load() - unable to find image "<<filename<<std::endl;
160 160 exit(1);
161 161 }
162   - allocate(cvImage.cols, cvImage.rows, cvImage.channels()); //allocate space for the image
  162 + int cols = cvImage.cols;
  163 + int rows = cvImage.rows;
  164 + int channels = cvImage.channels();
  165 + allocate(cols, rows, channels); //allocate space for the image
163 166 unsigned char* cv_ptr = (unsigned char*)cvImage.data;
164 167 if(C() == 1) //if this is a single-color image, just copy the data
165 168 memcpy(img, cv_ptr, bytes());
... ...
stim/math/vec3.h
... ... @@ -217,6 +217,7 @@ public:
217 217 return result;
218 218 }
219 219  
  220 +//#ifndef __NVCC__
220 221 /// Outputs the vector as a string
221 222 std::string str() const{
222 223 std::stringstream ss;
... ... @@ -234,6 +235,7 @@ public:
234 235  
235 236 return ss.str();
236 237 }
  238 +//#endif
237 239  
238 240 size_t size(){ return 3; }
239 241  
... ...
stim/parser/arguments.h
... ... @@ -523,7 +523,11 @@ namespace stim{
523 523 std::string arg(size_t a){
524 524 return args[a];
525 525 }
526   -
  526 +
  527 + /// Returns an std::vector of argument strings
  528 + std::vector<std::string> arg_vector(){
  529 + return args;
  530 + }
527 531 ///Returns an object describing the argument
528 532  
529 533 /// @param _name is the name of the requested argument
... ...
stim/parser/filename.h
... ... @@ -110,12 +110,17 @@ protected:
110 110 unix_dir = unix_dir.substr(2, unix_dir.length()-2); //extract the directory structure
111 111 }
112 112  
113   - if(unix_dir.at(0) == '/'){ //if there is a leading slash
114   - relative = false; //the path is not relative
115   - unix_dir = unix_dir.substr(1, unix_dir.length() - 1); //remove the slash
  113 + if(drive.size() != 0){
  114 + relative = false;
  115 + }
  116 + if(unix_dir.size() > 0){ //if there is a directory specified, remove surrounding slashes
  117 + if(unix_dir[0] == '/'){ //if there is a leading slash
  118 + relative = false; //the path is not relative
  119 + unix_dir = unix_dir.substr(1, unix_dir.length() - 1); //remove the slash
  120 + }
  121 + if(unix_dir[unix_dir.size()-1] == '/')
  122 + unix_dir = unix_dir.substr(0, unix_dir.length() - 1);
116 123 }
117   - if(unix_dir.at(unix_dir.size()-1) == '/')
118   - unix_dir = unix_dir.substr(0, unix_dir.length() - 1);
119 124  
120 125 path = stim::parser::split(unix_dir, '/'); //split up the directory structure
121 126  
... ...
stim/visualization/camera.h
... ... @@ -186,6 +186,7 @@ public:
186 186 d = vec3<float>(0, 0, 1);
187 187 up = vec3<float>(0, 1, 0);
188 188 focus = 1;
  189 + fov = 60;
189 190  
190 191 }
191 192  
... ...
stim/visualization/colormap.h
... ... @@ -4,13 +4,13 @@
4 4 #include <string>
5 5 #include <stdlib.h>
6 6 #include <cmath>
7   -#include "cublas_v2.h"
8 7  
9 8 #ifdef _WIN32
10 9 #include <float.h>
11 10 #endif
12 11  
13 12 #ifdef __CUDACC__
  13 +#include "cublas_v2.h"
14 14 #include <stim/cuda/cudatools/error.h>
15 15 #endif
16 16  
... ...