Commit 51b6469a3ee77583099edb0a57e1bb7859c28fd1

Authored by dmayerich
1 parent b6179de6

added look-up tables

bessjy.cpp
... ... @@ -13,7 +13,9 @@
13 13 //
14 14 #define _USE_MATH_DEFINES
15 15 #include <math.h>
16   -#include "bessel.h"
  16 +#include "bessel.h"
  17 +
  18 +#define PI 3.14159
17 19  
18 20 double gamma(double x);
19 21 //
... ... @@ -426,7 +428,7 @@ int bessjynb(int n,double x,int &amp;nm,double *jn,double *yn,
426 428 0.2775764465332031,
427 429 -1.993531733751297,
428 430 2.724882731126854e1};
429   -
  431 +
430 432 int i,k,m;
431 433 nm = n;
432 434 if ((x < 0.0) || (n < 0)) return 1;
... ... @@ -702,5 +704,26 @@ int bessjyv(double v,double x,double &amp;vm,double *jv,double *yv,
702 704 }
703 705 vm = n + v0;
704 706 return 0;
  707 +}
  708 +
  709 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  710 + double* cyv, double* cjvp, double* cyvp)
  711 +{
  712 + //first, compute the bessel functions of fractional order
  713 + bessjyv(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  714 +
  715 + //iterate through each and scale
  716 + for(int n = 0; n<=v; n++)
  717 + {
  718 +
  719 + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
  720 + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
  721 +
  722 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0));
  723 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0));
  724 + }
  725 +
  726 + return 0;
  727 +
705 728 }
706   -
  729 +
... ...
cbessjy.cpp
... ... @@ -724,6 +724,7 @@ int cbessjyva_sph(int v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
724 724 //iterate through each and scale
725 725 for(int n = 0; n<=v; n++)
726 726 {
  727 +
727 728 cjv[n] = cjv[n] * sqrt(PI/(z * 2.0));
728 729 cyv[n] = cyv[n] * sqrt(PI/(z * 2.0));
729 730  
... ...
colormap.h deleted
1   -#ifndef RTS_COLORMAP_H
2   -#define RTS_COLORMAP_H
3   -
4   -#include <string>
5   -#include <qimage.h>
6   -#include <qcolor.h>
7   -#include "rts/cuda/error.h"
8   -
9   -
10   -#define BREWER_CTRL_PTS 11
11   -
12   -#ifdef __CUDACC__
13   -texture<float4, cudaTextureType1D> cudaTexBrewer;
14   -static cudaArray* gpuBrewer;
15   -#endif
16   -
17   -
18   -
19   -namespace rts{
20   - namespace colormap{
21   -
22   -enum colormapType {cmBrewer, cmGrayscale};
23   -
24   -static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size)
25   -{
26   - //create an image object
27   - QImage image(x_size, y_size, QImage::Format_RGB32);
28   -
29   - int i;
30   - unsigned char r, g, b;
31   - unsigned int x, y;
32   - for(y=0; y<y_size; y++)
33   - for(x=0; x<x_size; x++)
34   - {
35   - //calculate the 1D index
36   - i = y * x_size + x;
37   -
38   - r = buffer[i * 3 + 0];
39   - g = buffer[i * 3 + 1];
40   - b = buffer[i * 3 + 2];
41   -
42   - //set the image pixel
43   - QColor color(r, g, b);
44   - image.setPixel(x, y, color.rgb());
45   - }
46   -
47   - image.save(filename.c_str());
48   -}
49   -
50   -#ifdef __CUDACC__
51   -static void initBrewer()
52   -{
53   - //initialize the Brewer colormap
54   -
55   - //allocate CPU space
56   - float4 cpuColorMap[BREWER_CTRL_PTS];
57   -
58   - //define control rtsPoints
59   - cpuColorMap[0] = make_float4(0.192157f, 0.211765f, 0.584314f, 1.0f);
60   - cpuColorMap[1] = make_float4(0.270588f, 0.458824f, 0.705882f, 1.0f);
61   - cpuColorMap[2] = make_float4(0.454902f, 0.678431f, 0.819608f, 1.0f);
62   - cpuColorMap[3] = make_float4(0.670588f, 0.85098f, 0.913725f, 1.0f);
63   - cpuColorMap[4] = make_float4(0.878431f, 0.952941f, 0.972549f, 1.0f);
64   - cpuColorMap[5] = make_float4(1.0f, 1.0f, 0.74902f, 1.0f);
65   - cpuColorMap[6] = make_float4(0.996078f, 0.878431f, 0.564706f, 1.0f);
66   - cpuColorMap[7] = make_float4(0.992157f, 0.682353f, 0.380392f, 1.0f);
67   - cpuColorMap[8] = make_float4(0.956863f, 0.427451f, 0.262745f, 1.0f);
68   - cpuColorMap[9] = make_float4(0.843137f, 0.188235f, 0.152941f, 1.0f);
69   - cpuColorMap[10] = make_float4(0.647059f, 0.0f, 0.14902f, 1.0f);
70   -
71   -
72   - int width = BREWER_CTRL_PTS;
73   - int height = 0;
74   -
75   -
76   - // allocate array and copy colormap data
77   - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
78   -
79   - HANDLE_ERROR(cudaMallocArray(&gpuBrewer, &channelDesc, width, height));
80   -
81   - HANDLE_ERROR(cudaMemcpyToArray(gpuBrewer, 0, 0, cpuColorMap, sizeof(float4)*width, cudaMemcpyHostToDevice));
82   -
83   - // set texture parameters
84   - cudaTexBrewer.addressMode[0] = cudaAddressModeClamp;
85   - //texBrewer.addressMode[1] = cudaAddressModeClamp;
86   - cudaTexBrewer.filterMode = cudaFilterModeLinear;
87   - cudaTexBrewer.normalized = true; // access with normalized texture coordinates
88   -
89   - // Bind the array to the texture
90   - HANDLE_ERROR(cudaBindTextureToArray( cudaTexBrewer, gpuBrewer, channelDesc));
91   -
92   -}
93   -
94   -static void destroyBrewer()
95   -{
96   - HANDLE_ERROR(cudaFreeArray(gpuBrewer));
97   -
98   -}
99   -
100   -template<class T>
101   -__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)
102   -{
103   - int i = blockIdx.x * blockDim.x + threadIdx.x;
104   - if(i >= N) return;
105   -
106   - //compute the normalized value on [minVal maxVal]
107   - float a = (gpuSource[i] - minVal) / (maxVal - minVal);
108   -
109   - //lookup the color
110   - float shift = 1.0/BREWER_CTRL_PTS;
111   - float4 color = tex1D(cudaTexBrewer, a+shift);
112   -
113   - gpuDest[i * 3 + 0] = 255 * color.x;
114   - gpuDest[i * 3 + 1] = 255 * color.y;
115   - gpuDest[i * 3 + 2] = 255 * color.z;
116   -}
117   -
118   -template<class T>
119   -__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1)
120   -{
121   - int i = blockIdx.x * blockDim.x + threadIdx.x;
122   - if(i >= N) return;
123   -
124   - //compute the normalized value on [minVal maxVal]
125   - float a = (gpuSource[i] - minVal) / (maxVal - minVal);
126   -
127   - gpuDest[i * 3 + 0] = 255 * a;
128   - gpuDest[i * 3 + 1] = 255 * a;
129   - gpuDest[i * 3 + 2] = 255 * a;
130   -}
131   -
132   -template<class T>
133   -static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128)
134   -{
135   - //This function converts a scalar field on the GPU to a color image on the GPU
136   - int gridDim = (nVals + blockDim - 1)/blockDim;
137   - if(cm == cmGrayscale)
138   - applyGrayscale<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);
139   - else if(cm == cmBrewer)
140   - {
141   - initBrewer();
142   - applyBrewer<<<gridDim, blockDim>>>(gpuSource, gpuDest, nVals, minVal, maxVal);
143   - destroyBrewer();
144   - }
145   -
146   -}
147   -
148   -template<class T>
149   -static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale)
150   -{
151   - //this function converts a scalar field on the GPU to a color image on the CPU
152   -
153   - //first create the color image on the GPU
154   -
155   - //allocate GPU memory for the color image
156   - unsigned char* gpuDest;
157   - HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 ));
158   -
159   - //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals));
160   -
161   - //create the image on the gpu
162   - gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm);
163   -
164   - //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3));
165   -
166   - //copy the image from the GPU to the CPU
167   - HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost));
168   -
169   - HANDLE_ERROR(cudaFree( gpuDest ));
170   -
171   -}
172   -
173   -template<typename T>
174   -static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)
175   -{
176   - //allocate a color buffer
177   - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);
178   -
179   - //do the mapping
180   - gpu2cpu<T>(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);
181   -
182   - //copy the buffer to an image
183   - buffer2image(cpuBuffer, fileDest, x_size, y_size);
184   -
185   - free(cpuBuffer);
186   -}
187   -
188   -#endif
189   -
190   -template<class T>
191   -static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale)
192   -{
193   - int i;
194   - float a;
195   - float range = valMax - valMin;
196   - for(i = 0; i<nVals; i++)
197   - {
198   - //normalize to the range [valMin valMax]
199   - a = (cpuSource[i] - valMin) / range;
200   -
201   - cpuDest[i * 3 + 0] = 255 * a;
202   - cpuDest[i * 3 + 1] = 255 * a;
203   - cpuDest[i * 3 + 2] = 255 * a;
204   - }
205   -
206   -}
207   -
208   -
209   -
210   -template<typename T>
211   -static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale)
212   -{
213   - //allocate a color buffer
214   - unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size);
215   -
216   - //do the mapping
217   - cpu2cpu<T>(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm);
218   -
219   - //copy the buffer to an image
220   - buffer2image(cpuBuffer, fileDest, x_size, y_size);
221   -
222   - free(cpuBuffer);
223   -
224   -}
225   -
226   -}} //end namespace colormap and rts
227   -
228   -#endif
229   -
dataTypes.h
... ... @@ -24,6 +24,8 @@ typedef double ptype;
24 24  
25 25 typedef ptype fieldPoint;
26 26  
  27 +extern bool verbose;
  28 +
27 29 //hybrid GPU/CPU complex data typ
28 30 #include "rts/math/complex.h"
29 31 #include "rts/math/vector.h"
... ...
defaults.h
... ... @@ -15,14 +15,14 @@
15 15 #define DEFAULT_FOCUS_X 0
16 16 #define DEFAULT_FOCUS_Y 0
17 17 #define DEFAULT_FOCUS_Z 0
18   -#define DEFAULT_INCIDENT_ORDER 100
  18 +//#define DEFAULT_INCIDENT_ORDER 20
19 19 #define DEFAULT_STABILITY_PARM 1.4
20 20  
21 21 //optics
22   -#define DEFAULT_CONDENSER_MIN 0.0
  22 +#define DEFAULT_CONDENSER_MIN 0
23 23 #define DEFAULT_CONDENSER_MAX 1
24 24  
25   -#define DEFAULT_OBJECTIVE_MIN 0.0
  25 +#define DEFAULT_OBJECTIVE_MIN 0
26 26 #define DEFAULT_OBJECTIVE_MAX 1
27 27  
28 28 //incident light direction
... ... @@ -36,17 +36,20 @@
36 36 //#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective
37 37  
38 38  
39   -#define DEFAULT_SLICE_MIN_X -5
40   -#define DEFAULT_SLICE_MIN_Y 0
41   -#define DEFAULT_SLICE_MIN_Z -5
  39 +#define DEFAULT_PLANE_MIN_X -5
  40 +#define DEFAULT_PLANE_MIN_Y 0
  41 +#define DEFAULT_PLANE_MIN_Z -5
42 42  
43   -#define DEFAULT_SLICE_MAX_X 5
44   -#define DEFAULT_SLICE_MAX_Y 0
45   -#define DEFAULT_SLICE_MAX_Z 5
  43 +#define DEFAULT_PLANE_MAX_X 5
  44 +#define DEFAULT_PLANE_MAX_Y 0
  45 +#define DEFAULT_PLANE_MAX_Z 5
46 46  
47   -#define DEFAULT_SLICE_NORM_X 0
48   -#define DEFAULT_SLICE_NORM_Y 1
49   -#define DEFAULT_SLICE_NORM_Z 0
  47 +#define DEFAULT_PLANE_NORM_X 0
  48 +#define DEFAULT_PLANE_NORM_Y 1
  49 +#define DEFAULT_PLANE_NORM_Z 0
  50 +
  51 +#define DEFAULT_PLANE_SIZE 40
  52 +#define DEFAULT_PLANE_POSITION 0
50 53  
51 54  
52 55 /*
... ... @@ -64,21 +67,23 @@
64 67 */
65 68  
66 69  
67   -#define DEFAULT_FIELD_ORDER 200
  70 +#define DEFAULT_FIELD_ORDER 10
68 71  
69   -#define DEFAULT_SAMPLES 200
  72 +#define DEFAULT_SAMPLES 400
70 73  
71 74 #define DEFAULT_SLICE_RES 256
72 75  
  76 +#define DEFAULT_SPHERE_THETA_R 1000
  77 +
73 78 #define DEFAULT_PADDING 1
74 79 #define DEFAULT_SUPERSAMPLE 1
75 80  
76   -#define DEFAULT_INTENSITY_FILE "testappend"
  81 +#define DEFAULT_INTENSITY_FILE "out_i.bmp"
77 82 #define DEFAULT_TRANSMITTANCE_FILE ""
78   -#define DEFAULT_ABSORBANCE_FILE "out_a"
  83 +#define DEFAULT_ABSORBANCE_FILE "out_a.bmp"
79 84 #define DEFAULT_NEAR_FILE "out_n.bmp"
80 85 #define DEFAULT_FAR_FILE "out_f.bmp"
81   -#define DEFAULT_EXTENDED_SOURCE "einstein_small.jpg"
  86 +#define DEFAULT_EXTENDED_SOURCE ""
82 87 #define DEFAULT_FIELD_TYPE "magnitude"
83 88 #define DEFAULT_FORMAT fileoutStruct::formatImage
84 89 #define DEFAULT_COLORMAP "brewer"
... ...
fieldslice.cpp
... ... @@ -8,14 +8,16 @@
8 8 using namespace std;
9 9  
10 10 fieldslice::fieldslice(unsigned int x_size, unsigned int y_size)
11   -{
  11 +{
  12 + x_hat = y_hat = z_hat = NULL;
  13 +
12 14 //save the slice resolution
13 15 R[0] = x_size;
14 16 R[1] = x_size;
15 17  
16 18 scalarField = true;
17 19  
18   - //init_gpu();
  20 + init_gpu();
19 21  
20 22  
21 23 }
... ... @@ -101,5 +103,5 @@ fieldslice::fieldslice()
101 103  
102 104 fieldslice::~fieldslice()
103 105 {
104   - //kill_gpu();
  106 + kill_gpu();
105 107 }
... ...
fieldslice.cu
1 1 #include "fieldslice.h"
2 2 #include "dataTypes.h"
3   -#include "rts/cuda/error.h"
  3 +#include "rts/cuda/error.h"
  4 +#include "rts/cuda/threads.h"
4 5  
5 6  
6 7 __global__ void field_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* I, unsigned int N)
7 8 {
8 9 //compute the index for this thread
9   - int i = blockIdx.x * blockDim.x + threadIdx.x;
  10 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  11 + int i = ThreadIndex1D();
  12 +
10 13 if(i >= N) return;
11 14  
12 15 ptype xm = x[i].abs();
... ... @@ -66,7 +69,8 @@ __global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, pty
66 69 __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N)
67 70 {
68 71 //compute the index for this thread
69   - int i = blockIdx.x * blockDim.x + threadIdx.x;
  72 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  73 + int i = ThreadIndex1D();
70 74 if(i >= N) return;
71 75  
72 76 V[i] = field_component[i].real();
... ... @@ -75,7 +79,8 @@ __global__ void field_real(bsComplex* field_component, ptype* V, unsigned int N)
75 79 __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N)
76 80 {
77 81 //compute the index for this thread
78   - int i = blockIdx.x * blockDim.x + threadIdx.x;
  82 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  83 + int i = ThreadIndex1D();
79 84 if(i >= N) return;
80 85  
81 86 V[i] = field_component[i].imag();
... ... @@ -84,7 +89,8 @@ __global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned i
84 89 __global__ void field_sqrt(ptype* input, ptype* output, unsigned int N)
85 90 {
86 91 //compute the index for this thread
87   - int i = blockIdx.x * blockDim.x + threadIdx.x;
  92 + //int i = blockIdx.x * blockDim.x + threadIdx.x;
  93 + int i = ThreadIndex1D();
88 94 if(i >= N) return;
89 95  
90 96 output[i] = sqrt(input[i]);
... ... @@ -115,7 +121,8 @@ scalarslice fieldslice::Mag()
115 121  
116 122 //compute the total number of values in the slice
117 123 unsigned int N = R[0] * R[1];
118   - int gridDim = (N+BLOCK-1)/BLOCK;
  124 + //int gridDim = (N+BLOCK-1)/BLOCK;
  125 + dim3 gridDim = GenGrid1D(N, BLOCK);
119 126  
120 127 field_intensity<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, result->S, N);
121 128 field_sqrt<<<gridDim, BLOCK>>>(result->S, result->S, N);
... ... @@ -132,7 +139,8 @@ scalarslice fieldslice::Real()
132 139  
133 140 //compute the total number of values in the slice
134 141 unsigned int N = R[0] * R[1];
135   - int gridDim = (N+BLOCK-1)/BLOCK;
  142 + //int gridDim = (N+BLOCK-1)/BLOCK;
  143 + dim3 gridDim = GenGrid1D(N, BLOCK);
136 144  
137 145 field_real<<<gridDim, BLOCK>>>(x_hat, result->S, N);
138 146  
... ... @@ -148,7 +156,8 @@ scalarslice fieldslice::Imag()
148 156  
149 157 //compute the total number of values in the slice
150 158 unsigned int N = R[0] * R[1];
151   - int gridDim = (N+BLOCK-1)/BLOCK;
  159 + //int gridDim = (N+BLOCK-1)/BLOCK;
  160 + dim3 gridDim = GenGrid1D(N, BLOCK);
152 161  
153 162 field_imaginary<<<gridDim, BLOCK>>>(x_hat, result->S, N);
154 163  
... ... @@ -192,7 +201,6 @@ void fieldslice::ScaleField(ptype v)
192 201  
193 202 //compute the total number of values in the slice
194 203 unsigned int N = R[0] * R[1];
195   - //cout<<"Size of mag field: "<<N<<endl;
196 204 int gridDim = (N+BLOCK-1)/BLOCK;
197 205  
198 206 field_scale<<<gridDim, BLOCK>>>(x_hat, y_hat, z_hat, N, v);
... ... @@ -200,19 +208,23 @@ void fieldslice::ScaleField(ptype v)
200 208 }
201 209  
202 210 void fieldslice::init_gpu()
203   -{
  211 +{
  212 + //if the field has no size, return
  213 + if(R[0] == 0 || R[1] == 0)
  214 + return;
  215 +
  216 + //free any previous memory allocations
  217 + if(x_hat)
  218 + HANDLE_ERROR(cudaFree(x_hat));
  219 + if(y_hat)
  220 + HANDLE_ERROR(cudaFree(y_hat));
  221 + if(z_hat)
  222 + HANDLE_ERROR(cudaFree(z_hat));
  223 +
204 224 //allocate space on the GPU for the field slice
205 225 HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex)));
206   - //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex)));
207 226  
208   - //if the field is scalar, y_hat and z_hat are unused
209   - if(scalarField)
210   - {
211   - y_hat = NULL;
212   - z_hat = NULL;
213   -
214   - }
215   - else
  227 + if(!scalarField)
216 228 {
217 229 HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex)));
218 230 //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex)));
... ... @@ -233,6 +245,8 @@ void fieldslice::kill_gpu()
233 245 if(z_hat != NULL)
234 246 HANDLE_ERROR(cudaFree(z_hat));
235 247  
  248 + x_hat = y_hat = z_hat = NULL;
  249 +
236 250 }
237 251  
238 252 void fieldslice::clear_gpu()
... ... @@ -275,7 +289,7 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv)
275 289 result.scalarField = scalarField;
276 290  
277 291 //allocate space for the new field
278   - result.init_gpu();
  292 + //result.init_gpu();
279 293  
280 294 //create one thread for each pixel of the field slice
281 295 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
... ... @@ -291,3 +305,57 @@ fieldslice fieldslice::crop(int u, int v, int su, int sv)
291 305  
292 306 return result;
293 307 }
  308 +
  309 +fieldslice::fieldslice(const fieldslice& rhs)
  310 +{
  311 + R[0] = rhs.R[0];
  312 + R[1] = rhs.R[1];
  313 + scalarField = rhs.scalarField;
  314 +
  315 + x_hat = y_hat = z_hat = NULL;
  316 +
  317 + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1];
  318 + if(rhs.x_hat != NULL)
  319 + {
  320 + HANDLE_ERROR(cudaMalloc( (void**)&x_hat, bytes));
  321 + HANDLE_ERROR(cudaMemcpy( x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice));
  322 + }
  323 + if(rhs.y_hat != NULL)
  324 + {
  325 + HANDLE_ERROR(cudaMalloc( (void**)&y_hat, bytes));
  326 + HANDLE_ERROR(cudaMemcpy( y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice));
  327 + }
  328 + if(rhs.z_hat != NULL)
  329 + {
  330 + HANDLE_ERROR(cudaMalloc( (void**)&z_hat, bytes));
  331 + HANDLE_ERROR(cudaMemcpy( z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice));
  332 + }
  333 +
  334 +}
  335 +
  336 +fieldslice& fieldslice::operator=(const fieldslice& rhs)
  337 +{
  338 + //make sure this isn't a self-allocation
  339 + if(this != &rhs)
  340 + {
  341 + //make a shallow copy
  342 + R[0] = rhs.R[0];
  343 + R[1] = rhs.R[1];
  344 + scalarField = rhs.scalarField;
  345 +
  346 + //initialize to new parameters
  347 + init_gpu();
  348 +
  349 + //make a deep copy
  350 + unsigned int bytes = sizeof(bsComplex) * R[0] * R[1];
  351 + if(x_hat != NULL)
  352 + HANDLE_ERROR(cudaMemcpy(x_hat, rhs.x_hat, bytes, cudaMemcpyDeviceToDevice));
  353 + if(y_hat != NULL)
  354 + HANDLE_ERROR(cudaMemcpy(y_hat, rhs.y_hat, bytes, cudaMemcpyDeviceToDevice));
  355 + if(z_hat != NULL)
  356 + HANDLE_ERROR(cudaMemcpy(z_hat, rhs.z_hat, bytes, cudaMemcpyDeviceToDevice));
  357 + }
  358 +
  359 + return *this;
  360 +
  361 +}
... ...
fieldslice.h
... ... @@ -31,6 +31,9 @@ struct fieldslice
31 31  
32 32 ~fieldslice();
33 33  
  34 + //copy constructor
  35 + fieldslice(const fieldslice& rhs);
  36 +
34 37 //void setPos(bsPoint pMin, bsPoint pMax, bsVector N);
35 38  
36 39 scalarslice Mag();
... ... @@ -47,6 +50,7 @@ struct fieldslice
47 50  
48 51 //crop a region from the field
49 52 fieldslice crop(int u, int v, int su, int sv);
  53 + fieldslice& operator=(const fieldslice& rhs);
50 54  
51 55 void init_gpu();
52 56 void kill_gpu();
... ...
fileout.cu
... ... @@ -186,11 +186,21 @@ void fileoutStruct::Save(microscopeStruct* scope)
186 186 //save images of the fields in the microscope
187 187  
188 188 //if the user specifies an extended source
189   - if(scope->focalPoints.size() > 1)
  189 + if(scope->focalPoints.size() > 0)
190 190 {
191 191 //simulate the extended source and output the detector image
192 192 scope->SimulateExtendedSource();
193 193  
  194 + //saveNearField(&scope->nf);
  195 + saveFarField(scope);
  196 +
  197 + //save the detector images
  198 + saveDetector(scope);
  199 +
  200 + //simulate scattering for the last point (so that you have a near field image)
  201 + scope->SimulateScattering();
  202 + saveNearField(&scope->nf);
  203 +
194 204 }
195 205 else
196 206 {
... ... @@ -203,12 +213,15 @@ void fileoutStruct::Save(microscopeStruct* scope)
203 213 //run the far-field simulation
204 214 scope->SimulateImaging();
205 215  
  216 + //saveNearField(&scope->nf);
206 217 saveFarField(scope);
207 218  
  219 + //save the detector images
  220 + saveDetector(scope);
  221 +
208 222 }
209 223  
210   - //save the detector images
211   - saveDetector(scope);
  224 +
212 225  
213 226  
214 227 }
... ...
fileout.h
... ... @@ -5,7 +5,7 @@
5 5 //#include "defaults.h"
6 6 #include "dataTypes.h"
7 7  
8   -#include "colormap.h"
  8 +#include "rts/graphics/colormap.h"
9 9 #include "fieldslice.h"
10 10 #include "nearfield.h"
11 11 #include "microscope.h"
... ... @@ -34,7 +34,7 @@ struct fileoutStruct{
34 34 //image_source source;
35 35  
36 36 //color map info
37   - rts::colormap::colormapType colormap;
  37 + rts::colormapType colormap;
38 38 ptype colorMax;
39 39  
40 40 void Save(microscopeStruct* scope);
... ...
main.cpp
... ... @@ -24,6 +24,7 @@ microscopeStruct* SCOPE;
24 24 #include "warnings.h"
25 25  
26 26 fileoutStruct gFileOut;
  27 +bool verbose = false;
27 28 using namespace std;
28 29  
29 30 int cbessjyva(double v,complex<double> z,double &vm,complex<double>*cjv,
... ... @@ -31,32 +32,19 @@ int cbessjyva(double v,complex&lt;double&gt; z,double &amp;vm,complex&lt;double&gt;*cjv,
31 32  
32 33 int main(int argc, char *argv[])
33 34 {
34   - //test Envi loading and saving
35   - //EnviFile envi("testenvi", "w");
36   -
37   - //float* data = (float*)malloc(sizeof(float) * 100 * 100);
38   - //envi.addBand(data, 100, 100, 100);
39   -
40   - //envi.close();
41   -
42   - //return 0;
43 35  
44 36 SCOPE = new microscopeStruct();
45 37  
46   - cout<<SCOPE->nf.Uf.R[0]<<endl;
47   -
48 38 LoadParameters(argc, argv);
49 39  
50   - //TestSimulation(NF, SCOPE, &gFileOut);
51   -
52 40 //initialize GPU memory for fields
53 41 SCOPE->init();
54 42  
55   - OutputOptions();
56   -
57 43 gFileOut.Save(SCOPE);
58 44  
59   - //NF->destroy();
  45 + if(verbose)
  46 + OutputOptions();
  47 +
60 48 SCOPE->destroy();
61 49  
62 50  
... ...
microscope.cu
... ... @@ -4,7 +4,7 @@
4 4 #include "rts/tools/progressbar.h"
5 5 #include "rts/cuda/timer.h"
6 6 #include "dataTypes.h"
7   -#include "colormap.h"
  7 +#include "rts/graphics/colormap.h"
8 8  
9 9 #include <QImage>
10 10  
... ... @@ -112,8 +112,8 @@ void microscopeStruct::getFarField()
112 112 //Compute the Far Field image of the focal plane
113 113  
114 114 //clear the memory from previous detector fields
115   - Ud.kill_gpu();
116   - Ufd.kill_gpu();
  115 + //Ud.kill_gpu();
  116 + //Ufd.kill_gpu();
117 117  
118 118 //first crop the filtered near-field image of the source and scattered fields
119 119 Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]);
... ... @@ -261,9 +261,14 @@ void microscopeStruct::SimulateExtendedSource()
261 261 t += gpuStopTimer();
262 262  
263 263 rtsProgressBar((double)(i+1)/(double)npts * 100);
  264 + //unsigned char c;
  265 + //cin>>c;
264 266 }
265   - cout<<endl;
266   - cout<<"Time per source: "<<t/npts<<"ms"<<endl;
  267 + if(verbose)
  268 + {
  269 + cout<<endl;
  270 + cout<<"Time per source: "<<t/npts<<"ms"<<endl;
  271 + }
267 272  
268 273 }
269 274  
... ... @@ -304,3 +309,15 @@ void microscopeStruct::LoadExtendedSource(std::string filename)
304 309 }
305 310 }
306 311 }
  312 +
  313 +std::string microscopeStruct::toStr()
  314 +{
  315 + stringstream ss;
  316 + ss<<nf.toStr();
  317 +
  318 + ss<<"----------Optics--------------"<<endl<<endl;
  319 + ss<<"Objective NA: "<<objective[0]<<" to "<<objective[1]<<endl;
  320 + return ss.str();
  321 +
  322 +
  323 +}
... ...
microscope.h
... ... @@ -63,6 +63,8 @@ struct microscopeStruct
63 63 scalarslice getTransmittance();
64 64 scalarslice getIntensity();
65 65  
  66 + string toStr();
  67 +
66 68  
67 69  
68 70 };
... ...
montecarlo.cpp
... ... @@ -35,18 +35,12 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout)
35 35 ptype inPhi = asin(NAin);
36 36 ptype outPhi = asin(NAout);
37 37  
38   - //cout<<"inPhi: "<<inPhi<<endl;
39   - //cout<<"outPhi: "<<outPhi<<endl;
40   -
41 38 //calculate the z-values associated with these angles
42 39 ptype inZ = cos(inPhi);
43 40 ptype outZ = cos(outPhi);
44 41  
45 42 ptype rangeZ = inZ - outZ;
46 43  
47   - //cout<<"inZ: "<<inZ<<endl;
48   - //cout<<"outZ: "<<outZ<<endl;
49   -
50 44 //draw a distribution of random phi, z values
51 45 ptype z, phi, theta;
52 46 for(int i=0; i<N; i++)
... ... @@ -58,7 +52,6 @@ void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout)
58 52 phi = acos(z);
59 53  
60 54 //compute and store cartesian coordinates
61   - //bsVector spherical(1, theta + kSph[1], phi + kSph[2]);
62 55 bsVector spherical(1, theta, phi);
63 56 bsVector cart = spherical.sph2cart();
64 57 samples[i] = rotation * cart;
... ...
nearfield.cpp
1 1 #include "nearfield.h"
  2 +#include <time.h>
  3 +#include <math.h>
  4 +
  5 +#ifdef _WIN32
  6 +#define isnan(x) _isnan(x)
  7 +#define isinf(x) (!_finite(x))
  8 +#endif
  9 +
  10 +int bessjyv_sph(int v, double z, double &vm, double* cjv,
  11 + double* cyv, double* cjvp, double* cyvp);
2 12  
3 13 nearfieldStruct::nearfieldStruct()
4 14 {
5 15 scalarSim = true;
6 16 planeWave = false;
  17 + lut_us = true;
  18 + lut_uf = false;
7 19  
8 20 nWaves = 0;
9 21 }
... ... @@ -46,6 +58,8 @@ std::string nearfieldStruct::toStr()
46 58 ss<<"Condenser NA: "<<condenser[0]<<" to "<<condenser[1]<<std::endl;
47 59 ss<<"Focal Point: "<<focus[0]<<", "<<focus[1]<<", "<<focus[2]<<std::endl;
48 60 ss<<"Field Slice: "<<std::endl;
  61 + if(lut_us)
  62 + ss<<"LUT Parameters --- min: "<<d_min<<" max: "<<d_max<<std::endl;
49 63 ss<<pos<<std::endl;
50 64  
51 65 ss<<std::endl<<"---------Materials-----------"<<std::endl;
... ... @@ -61,6 +75,10 @@ std::string nearfieldStruct::toStr()
61 75 for(unsigned int s=0; s<sVector.size(); s++)
62 76 ss<<sVector[s].toStr()<<std::endl;
63 77  
  78 + ss<<"---------Timings-------------"<<std::endl;
  79 + ss<<"Uf = "<<t_Uf<<"ms"<<std::endl;
  80 + ss<<"Us = "<<t_Us<<"ms"<<std::endl;
  81 +
64 82 return ss.str();
65 83 }
66 84  
... ... @@ -70,7 +88,8 @@ void nearfieldStruct::calcWaves()
70 88 inWaves.resize(nWaves);
71 89  
72 90 //re-seed the random number generator
73   - //srand(seed);
  91 + //srand(time(NULL));
  92 + srand(NULL);
74 93  
75 94 //calculate the monte-carlo samples
76 95 mcSampleNA(&inWaves[0], nWaves, k, condenser[0], condenser[1]);
... ... @@ -84,6 +103,8 @@ void nearfieldStruct::calcSpheres()
84 103 //calculate all of the constants necessary to evaluate the scattered field
85 104 //estimate the order required to represent the scattered field for each sphere
86 105  
  106 +
  107 +
87 108 //for each sphere
88 109 for(int i=0; i<sVector.size(); i++)
89 110 {
... ... @@ -91,12 +112,10 @@ void nearfieldStruct::calcSpheres()
91 112  
92 113 //calculate the required order
93 114 sVector[i].calcNl(lambda);
94   - //std::cout<<sVector[i].Nl<<std::endl;
95 115  
96 116 //set the refractive index for the sphere
97 117 int imat = sVector[i].iMaterial;
98 118 rts::rtsComplex<ptype> n = mVector[imat](lambda);
99   - //std::cout<<"Sphere refractive index: "<<n<<std::endl;
100 119  
101 120 //calculate the scattering coefficients
102 121 sVector[i].calcCoeff(lambda, n);
... ... @@ -104,18 +123,109 @@ void nearfieldStruct::calcSpheres()
104 123 //save the refractive index
105 124 sVector[i].n = n;
106 125  
  126 + //if the LUT is used, calculate Usp(theta, r)
  127 + if(lut_us)
  128 + {
  129 + sVector[i].calcUp(lambda, n, pos, max(U.R[0], U.R[1]));
  130 + }
  131 +
  132 +
107 133 }
108 134  
109 135 }
110 136  
  137 +void nearfieldStruct::calcUs()
  138 +{
  139 +
  140 +
  141 + if(lut_us)
  142 + scalarUpLut();
  143 + else
  144 + scalarUs();
  145 +}
  146 +
  147 +void nearfieldStruct::calcUf()
  148 +{
  149 + if(lut_uf)
  150 + scalarUfLut();
  151 + else
  152 + scalarUf();
  153 +}
  154 +
111 155 void nearfieldStruct::Simulate()
112 156 {
  157 + //initialize timings
  158 + t_Uf = 0;
  159 + t_Us = 0;
  160 +
113 161 //compute a set of plane waves for Monte-Carlo simulation
114 162 calcWaves();
115 163  
116 164 //the near field has to be simulated no matter what the output rtsPoint is
117   - scalarUf();
  165 + calcUf();
118 166 calcSpheres();
119   - scalarUs();
  167 + calcUs();
120 168 sumUf();
  169 +
  170 + //U.Mag().toImage("testU.bmp");
  171 +}
  172 +
  173 +void nearfieldStruct::calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR)
  174 +{
  175 + /*Compute the look-up-table for spherical bessel functions used for the incident field
  176 + j = (Nl + 1) x aR array of values
  177 + aR = resolution of j
  178 + */
  179 +
  180 + //compute the wavenumber
  181 + ptype k = 2 * PI / lambda;
  182 + unsigned int Nl = m;
  183 +
  184 + //allocate space for the Bessel functions of the first and second kind (and derivatives -- which will be ignored)
  185 + int bytes = sizeof(double) * (Nl + 1);
  186 + double* cjv_kd = (double*)malloc(bytes);
  187 + double* cyv_kd = (double*)malloc(bytes);
  188 + double* cjvp_kd = (double*)malloc(bytes);
  189 + double* cyvp_kd = (double*)malloc(bytes);
  190 +
  191 + //compute the bessel functions using the CPU-based algorithm
  192 + double vm;
  193 +
  194 + //for each sample along r
  195 + ptype dr = (d_max - d_min) / (dR - 1);
  196 + ptype d;
  197 + ptype jv;
  198 + for(int id = 0; id < dR; id++)
  199 + {
  200 + d = id * dr + d_min;
  201 + double kd = k*d;
  202 + bessjyv_sph(Nl, kd, vm, cjv_kd, cyv_kd, cjvp_kd, cyvp_kd);
  203 +
  204 + //copy the double data to the bsComplex array
  205 + for(int l=0; l<=Nl; l++)
  206 + {
  207 + jv = cjv_kd[l];
  208 + if(isnan(jv) || isinf(jv))
  209 + {
  210 + if(kd == 0 && l == 0)
  211 + jv = 1;
  212 + else
  213 + jv = 0;
  214 + }
  215 + j[id * (Nl+1) + l] = jv;
  216 + }
  217 + }
  218 +
  219 + /*ofstream outfile("uf_besselout.txt");
  220 + for(int ir = 0; ir < dR; ir++)
  221 + {
  222 + outfile<<ir*dr + d_min<<endl;
  223 + for(int l = 0; l<=Nl; l++)
  224 + {
  225 + outfile<<j[ir * (Nl+1) + l]<<" --";
  226 + }
  227 + outfile<<endl;
  228 + }
  229 + outfile.close();*/
  230 +
121 231 }
... ...
nearfield.h
... ... @@ -31,6 +31,8 @@ struct nearfieldStruct
31 31  
32 32 //slices for the focused field
33 33 fieldslice Uf;
  34 + ptype d_min, d_max;
  35 +
34 36 // and total field: Uf + sum(Us)
35 37 fieldslice U;
36 38  
... ... @@ -43,6 +45,14 @@ struct nearfieldStruct
43 45 //flag for a plane wave
44 46 bool planeWave;
45 47  
  48 + //flag for using a LUT
  49 + bool lut_uf;
  50 + bool lut_us;
  51 +
  52 + //timings
  53 + float t_Uf;
  54 + float t_Us;
  55 +
46 56  
47 57  
48 58 //---------Scatterers------------
... ... @@ -78,10 +88,17 @@ struct nearfieldStruct
78 88 void setPos(bsPoint pMin, bsPoint pMax, bsVector normal);
79 89  
80 90 //this function re-computes the focused field
  91 + void calcUf();
81 92 void scalarUf();
  93 + void scalarUfLut();
  94 +
  95 + void calcBesselLut(ptype* j, ptype d_min, ptype d_max, int dR);
82 96  
83 97 //compute the field scattered by all of the materials
  98 + void calcUs();
84 99 void scalarUs();
  100 + void scalarUpLut();
  101 +
85 102  
86 103 //add the incident field to the sum of scattered fields
87 104 void sumUf();
... ...
nfScalarUf.cu
... ... @@ -5,7 +5,7 @@
5 5 #include "rts/cuda/error.h"
6 6 #include "rts/cuda/timer.h"
7 7  
8   -
  8 +//Incident field for a single plane wave
9 9 __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR)
10 10 {
11 11 /*Compute the scalar focused field using Debye focusing
... ... @@ -41,7 +41,8 @@ __global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, p
41 41 Uf[i] = exp(d) * A;
42 42  
43 43 }
44   -
  44 +
  45 +//Incident field for a focused point source
45 46 __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4)
46 47 {
47 48 /*Compute the scalar focused field using Debye focusing
... ... @@ -151,7 +152,6 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
151 152 }
152 153  
153 154 sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
154   - //sumUf += il * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
155 155  
156 156 il *= im;
157 157 }
... ... @@ -162,21 +162,12 @@ __global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, pt
162 162  
163 163 void nearfieldStruct::scalarUf()
164 164 {
165   - //Compute the incident field via a scalar simulation
166   - //This method uses Debye focusing to approximate the field analytically
167   -
168   - //time the calculation of the focused field
169   - //gpuStartTimer();
170   -
171   - //set the field slice to a scalar field
172   - //Uf.scalarField = true;
173   -
174   - //initialize the GPU arrays
175   - //Uf.init_gpu();
  165 +
  166 + gpuStartTimer();
176 167  
177 168 //create one thread for each pixel of the field slice
178 169 dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
179   - dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  170 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
180 171  
181 172 //if we are computing a plane wave, call the gpuScalarUfp function
182 173 if(planeWave)
... ... @@ -191,10 +182,7 @@ void nearfieldStruct::scalarUf()
191 182 ptype cosBeta = cos(asin(condenser[1]));
192 183 //compute the scalar Uf field (this will be in the x_hat channel of Uf)
193 184 gpuScalarUf<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m);
194   - }
195   -
196   - //float t = gpuStopTimer();
197   - //std::cout<<"Scalar Uf Time: "<<t<<"ms"<<std::endl;
198   - //std::cout<<focus<<std::endl;
199   -
  185 + }
  186 +
  187 + t_Uf = gpuStopTimer();
200 188 }
... ...
nfScalarUfLut.cu 0 โ†’ 100644
  1 +#include "nearfield.h"
  2 +
  3 +#include "rts/math/legendre.h"
  4 +#include "rts/cuda/error.h"
  5 +#include "rts/cuda/timer.h"
  6 +
  7 +texture<float, cudaTextureType2D> texJ;
  8 +
  9 +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR);
  10 +
  11 +__global__ void gpuScalarUfLut(bsComplex* Uf, bsRect ABCD, int uR, int vR, bsPoint f, bsVector k, ptype A, ptype cosAlpha, ptype cosBeta, int nl, ptype dmin, ptype dmax, int dR)
  12 +{
  13 + /*This function computes the focused field for a 2D slice
  14 +
  15 + Uf = destination field slice
  16 + ABCD = plane representing the field slice in world space
  17 + uR, vR = resolution of the Uf field
  18 + f = focal point of the condenser
  19 + k = direction of the incident light
  20 + A = amplitude of the incident field
  21 + cosAlpha= cosine of the solid angle subtended by the condenser obscuration
  22 + cosBeta = cosine of the solid angle subtended by the condenser aperature
  23 + nl = number of orders used to compute the field
  24 + dR = number of Bessel function values in the look-up texture
  25 +
  26 + */
  27 +
  28 + //get the current coordinate in the plane slice
  29 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  30 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  31 +
  32 + //make sure that the thread indices are in-bounds
  33 + if(iu >= uR || iv >= vR) return;
  34 +
  35 + //compute the index (easier access to the scalar field array)
  36 + int i = iv*uR + iu;
  37 +
  38 + //compute the parameters for u and v
  39 + ptype u = (ptype)iu / (uR);
  40 + ptype v = (ptype)iv / (vR);
  41 +
  42 +
  43 +
  44 + //get the rtsPoint in world space and then the r vector
  45 + bsPoint p = ABCD(u, v);
  46 + bsVector r = p - f;
  47 + ptype d = r.len();
  48 +
  49 + if(d == 0)
  50 + {
  51 + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta);
  52 + return;
  53 + }
  54 +
  55 + //get info for the light direction and frequency
  56 + r = r.norm();
  57 +
  58 + //compute the imaginary factor i^l
  59 + bsComplex im = bsComplex(0, 1);
  60 + bsComplex il = bsComplex(1, 0);
  61 +
  62 + //Legendre functions are computed dynamically to save memory
  63 + //initialize the Legendre functions
  64 +
  65 + ptype P[2];
  66 + //get the angle between k and r (light direction and position vector)
  67 + ptype cosTheta;
  68 + cosTheta = k.dot(r);
  69 +
  70 + rts::init_legendre<ptype>(cosTheta, P[0], P[1]);
  71 +
  72 + //initialize legendre functions for the cassegrain angles
  73 + ptype Palpha[3];
  74 + rts::init_legendre<ptype>(cosAlpha, Palpha[0], Palpha[1]);
  75 + Palpha[2] = 1;
  76 +
  77 + ptype Pbeta[3];
  78 + rts::init_legendre<ptype>(cosBeta, Pbeta[0], Pbeta[1]);
  79 + Pbeta[2] = 1;
  80 +
  81 + //for each order l
  82 + bsComplex sumUf(0.0, 0.0);
  83 + ptype jl = 0.0;
  84 + ptype Pl;
  85 + ptype di = ( (d - dmin)/(dmax - dmin) ) * (dR - 1);
  86 + for(int l = 0; l<=nl; l++)
  87 + {
  88 + jl = tex2D(texJ, l + 0.5, di + 0.5);
  89 + if(l==0)
  90 + Pl = P[0];
  91 + else if(l==1)
  92 + {
  93 + Pl = P[1];
  94 +
  95 + //adjust the cassegrain Legendre function
  96 + Palpha[2] = Palpha[0];
  97 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  98 + Pbeta[2] = Pbeta[0];
  99 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  100 + }
  101 + else
  102 + {
  103 + rts::shift_legendre<ptype>(l, cosTheta, P[0], P[1]);
  104 +
  105 + Pl = P[1];
  106 +
  107 + //adjust the cassegrain outer Legendre function
  108 + Palpha[2] = Palpha[0];
  109 + rts::shift_legendre<ptype>(l+1, cosAlpha, Palpha[0], Palpha[1]);
  110 + Pbeta[2] = Pbeta[0];
  111 + rts::shift_legendre<ptype>(l+1, cosBeta, Pbeta[0], Pbeta[1]);
  112 + }
  113 +
  114 + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]);
  115 + //sumUf += jl;
  116 +
  117 + il *= im;
  118 + }
  119 +
  120 + Uf[i] = sumUf * 2 * PI * A;
  121 + //Uf[i] = u;
  122 + //return;
  123 +}
  124 +
  125 +void nearfieldStruct::scalarUfLut()
  126 +{
  127 + gpuStartTimer();
  128 +
  129 + //calculate the minimum and maximum points in the focused field
  130 + d_min = pos.dist(focus);
  131 + d_max = pos.dist_max(focus);
  132 +
  133 + //allocate space for the Bessel function
  134 + int dR = 2 * max(Uf.R[0], Uf.R[1]);
  135 + ptype* j = NULL;
  136 + j = (ptype*) malloc(sizeof(ptype) * dR * (m+1));
  137 +
  138 + //calculate Bessel function LUT
  139 + calcBesselLut(j, d_min, d_max, dR);
  140 +
  141 + //create a CUDA array structure and specify the format description
  142 + cudaArray* arrayJ;
  143 + cudaChannelFormatDesc channelDesc =
  144 + cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
  145 +
  146 + //allocate memory
  147 + HANDLE_ERROR(cudaMallocArray(&arrayJ, &channelDesc, m+1, dR));
  148 +
  149 + //specify texture properties
  150 + texJ.addressMode[0] = cudaAddressModeMirror;
  151 + texJ.addressMode[1] = cudaAddressModeMirror;
  152 + texJ.filterMode = cudaFilterModeLinear;
  153 + texJ.normalized = false;
  154 +
  155 + //bind the texture to the array
  156 + HANDLE_ERROR(cudaBindTextureToArray(texJ, arrayJ, channelDesc));
  157 +
  158 + //copy the CPU Bessel LUT to the GPU-based array
  159 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayJ, 0, 0, j, (m+1)*sizeof(float), (m+1)*sizeof(float), dR, cudaMemcpyHostToDevice));
  160 +
  161 + //----------------Compute the focused field
  162 + //create one thread for each pixel of the field slice
  163 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  164 + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  165 +
  166 + //if we are computing a plane wave, call the gpuScalarUfp function
  167 + if(planeWave)
  168 + {
  169 + gpuScalarUfp<<<dimGrid, dimBlock>>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]);
  170 + }
  171 + //otherwise compute the condenser info and create a focused field
  172 + else
  173 + {
  174 + //pre-compute the cosine of the obscuration and objective angles
  175 + ptype cosAlpha = cos(asin(condenser[0]));
  176 + ptype cosBeta = cos(asin(condenser[1]));
  177 + //compute the scalar Uf field (this will be in the x_hat channel of Uf)
  178 + gpuScalarUfLut<<<dimGrid, dimBlock>>>(Uf.x_hat, pos, Uf.R[0], Uf.R[1], focus, k, A, cosAlpha, cosBeta, m, d_min, d_max, dR);
  179 + }
  180 +
  181 +
  182 + //free everything
  183 + free(j);
  184 +
  185 + HANDLE_ERROR(cudaFreeArray(arrayJ));
  186 +
  187 + t_Uf = gpuStopTimer();
  188 +}
... ...
nfScalarUpLut.cu 0 โ†’ 100644
  1 +#include "nearfield.h"
  2 +#include "rts/math/spherical_bessel.h"
  3 +#include "rts/math/legendre.h"
  4 +#include <stdlib.h>
  5 +#include "rts/cuda/error.h"
  6 +#include "rts/cuda/timer.h"
  7 +
  8 +texture<float2, cudaTextureType2D> texUsp;
  9 +texture<float2, cudaTextureType2D> texUip;
  10 +
  11 +__global__ void gpuScalarUpLut(bsComplex* Us, bsVector* k, int nk, ptype kmag, ptype a, ptype dmin, ptype dmax, bsPoint f, bsPoint ps, ptype A, bsRect ABCD, int uR, int vR, int dR, int aR, int thetaR)
  12 +{
  13 + /*This function uses Monte-Carlo integration to sample a texture-based LUT describing the scattered field
  14 + produced by a plane wave through a sphere. The MC sampling is used to approximate a focused field.
  15 +
  16 + Us = final scattered field
  17 + k = list of incoming plane waves (Monte-Carlo samples)
  18 + nk = number of incoming MC samples
  19 + kmag= magnitude of the incoming field 2pi/lambda
  20 + dmin= minimum distance of the Usp texture
  21 + dmax= maximum distance of the Usp texture
  22 + f = position of the focus
  23 + ps = position of the sphere
  24 + A = total amplitude of the incident field arriving at the focal spot
  25 + ABCD= rectangle representing the field slice
  26 + uR = resolution of the field slice in the u direction
  27 + vR = resolution of the field slice in the v direction
  28 + dR = resolution of the Usp texture in the d direction
  29 + thetaR= resolution of the Usp texture in the theta direction
  30 + */
  31 +
  32 + //get the current coordinate in the plane slice
  33 + int iu = blockIdx.x * blockDim.x + threadIdx.x;
  34 + int iv = blockIdx.y * blockDim.y + threadIdx.y;
  35 +
  36 + //make sure that the thread indices are in-bounds
  37 + if(iu >= uR || iv >= vR) return;
  38 +
  39 + //compute the index (easier access to the scalar field array)
  40 + int i = iv*uR + iu;
  41 +
  42 + //compute the parameters for u and v
  43 + ptype u = (ptype)iu / (uR);
  44 + ptype v = (ptype)iv / (vR);
  45 +
  46 + //get the rtsPoint in world space and then the r vector
  47 + bsPoint p = ABCD(u, v);
  48 + bsVector r = p - ps;
  49 + ptype d = r.len();
  50 + float di = ( (d - max(a, dmin))/(dmax - max(a, dmin)) ) * (dR - 1);
  51 + float ai = ( (d - dmin)/(a - dmin)) * (aR - 1);
  52 +
  53 + bsComplex sumUs(0, 0);
  54 + //for each plane wave in the wave list
  55 + for(int iw = 0; iw < nk; iw++)
  56 + {
  57 + //normalize the direction vectors and find their inner product
  58 + r = r.norm();
  59 + ptype cos_theta = k[iw].dot(r);
  60 + if(cos_theta < -1)
  61 + cos_theta = -1;
  62 + if(cos_theta > 1)
  63 + cos_theta = 1;
  64 + float thetai = ( acos(cos_theta) / PI ) * (thetaR - 1);
  65 +
  66 + //compute the phase factor for spheres that are not at the origin
  67 + bsVector c = ps - f;
  68 + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c)));
  69 +
  70 + //compute the internal field if we are inside a sphere
  71 + if(d < a)
  72 + {
  73 + float2 Uip = tex2D(texUip, ai + 0.5, thetai + 0.5);
  74 + sumUs += (1.0/nk) * A * phase * bsComplex(Uip.x, Uip.y);
  75 + }
  76 + //otherwise compute the scattered field
  77 + else
  78 + {
  79 + float2 Usp = tex2D(texUsp, di + 0.5, thetai + 0.5);
  80 + sumUs += (1.0/nk) * A * phase * bsComplex(Usp.x, Usp.y);
  81 + }
  82 +
  83 + }
  84 +
  85 + Us[i] += sumUs;
  86 +}
  87 +
  88 +void nearfieldStruct::scalarUpLut()
  89 +{
  90 + //get the number of spheres
  91 + int nSpheres = sVector.size();
  92 +
  93 + //if there are no spheres, nothing to do here
  94 + if(nSpheres == 0)
  95 + return;
  96 +
  97 + //time the calculation of the focused field
  98 + gpuStartTimer();
  99 +
  100 + //clear the scattered field
  101 + U.clear_gpu();
  102 +
  103 + //create one thread for each pixel of the field slice
  104 + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK);
  105 + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK);
  106 +
  107 + //copy Monte-Carlo samples to the GPU and determine the incident amplitude (plane-wave specific stuff)
  108 + bsVector* gpuk;
  109 + int nWaves;
  110 + ptype subA;
  111 + if(planeWave)
  112 + {
  113 + nWaves = 1;
  114 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) ) );
  115 + HANDLE_ERROR(cudaMemcpy( gpuk, &k, sizeof(bsVector), cudaMemcpyHostToDevice));
  116 + subA = A;
  117 + }
  118 + else
  119 + {
  120 + nWaves = inWaves.size();
  121 + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * nWaves ) );
  122 + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * nWaves, cudaMemcpyHostToDevice));
  123 + //compute the amplitude that makes it through the condenser
  124 + subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) );
  125 + }
  126 +
  127 + //for each sphere
  128 + for(int s = 0; s<nSpheres; s++)
  129 + {
  130 + //get the current sphere
  131 + //sphere S = sVector[s];
  132 +
  133 + //allocate space for the Usp and Uip textures
  134 + //allocate the cuda array
  135 + cudaArray* arrayUsp;
  136 + cudaArray* arrayUip;
  137 + cudaChannelFormatDesc channelDescUsp =
  138 + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  139 + cudaChannelFormatDesc channelDescUip =
  140 + cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
  141 + int dR = sVector[s].Usp.R[0];
  142 + int thetaR = sVector[s].Usp.R[1];
  143 + int aR = sVector[s].Uip.R[0];
  144 + HANDLE_ERROR(cudaMallocArray(&arrayUsp, &channelDescUsp, dR, thetaR));
  145 + HANDLE_ERROR(cudaMallocArray(&arrayUip, &channelDescUip, aR, thetaR));
  146 +
  147 + texUsp.addressMode[0] = cudaAddressModeMirror;
  148 + texUsp.addressMode[1] = cudaAddressModeMirror;
  149 + texUsp.filterMode = cudaFilterModeLinear;
  150 + texUsp.normalized = false;
  151 +
  152 + texUip.addressMode[0] = cudaAddressModeMirror;
  153 + texUip.addressMode[1] = cudaAddressModeMirror;
  154 + texUip.filterMode = cudaFilterModeLinear;
  155 + texUip.normalized = false;
  156 + HANDLE_ERROR(cudaBindTextureToArray(texUsp, arrayUsp, channelDescUsp));
  157 + HANDLE_ERROR(cudaBindTextureToArray(texUip, arrayUip, channelDescUip));
  158 +
  159 + //copy the LUT to the Usp texture
  160 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUsp, 0, 0, sVector[s].Usp.x_hat, dR*sizeof(float2), dR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  161 + HANDLE_ERROR( cudaMemcpy2DToArray(arrayUip, 0, 0, sVector[s].Uip.x_hat, aR*sizeof(float2), aR*sizeof(float2), thetaR, cudaMemcpyDeviceToDevice));
  162 +
  163 + gpuScalarUpLut<<<dimGrid, dimBlock>>>(U.x_hat,
  164 + gpuk,
  165 + nWaves,
  166 + 2 * PI / lambda,
  167 + sVector[s].a,
  168 + sVector[s].d_min,
  169 + sVector[s].d_max,
  170 + focus,
  171 + sVector[s].p,
  172 + subA,
  173 + pos,
  174 + U.R[0],
  175 + U.R[1],
  176 + dR,
  177 + aR,
  178 + thetaR);
  179 +
  180 + cudaFreeArray(arrayUsp);
  181 + cudaFreeArray(arrayUip);
  182 +
  183 + }
  184 +
  185 +
  186 + //store the time to compute the scattered field
  187 + t_Us = gpuStopTimer();
  188 +
  189 + //free monte-carlo samples
  190 + cudaFree(gpuk);
  191 +
  192 +}
... ...
nfScalarUs.cu
... ... @@ -163,7 +163,7 @@ void nearfieldStruct::scalarUs()
163 163 return;
164 164  
165 165 //time the calculation of the focused field
166   - //gpuStartTimer();
  166 + gpuStartTimer();
167 167  
168 168 //clear the scattered field
169 169 U.clear_gpu();
... ... @@ -251,9 +251,8 @@ void nearfieldStruct::scalarUs()
251 251 }
252 252  
253 253  
  254 + //store the time to compute the scattered field
  255 + t_Us = gpuStopTimer();
254 256  
255   - //float t = gpuStopTimer();
256   - //std::cout<<"Scalar Us Time: "<<t<<"ms"<<std::endl;
257   - //std::cout<<focus<<std::endl;
258 257  
259 258 }
... ...
nfSumUf.cu
... ... @@ -32,7 +32,7 @@ __global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz,
32 32 {
33 33 r = p - ps[is];
34 34 d = r.len();
35   - if(d <= as[is])
  35 + if(d < as[is])
36 36 return;
37 37 }
38 38  
... ... @@ -110,8 +110,5 @@ void nearfieldStruct::sumUf()
110 110 HANDLE_ERROR(cudaFree(gpu_p));
111 111 HANDLE_ERROR(cudaFree(gpu_a));
112 112  
113   - //float t = gpuStopTimer();
114   - //std::cout<<"Add Us Time: "<<t<<"ms"<<std::endl;
115   - //std::cout<<focus<<std::endl;
116 113  
117 114 }
... ...
options.h
... ... @@ -5,7 +5,7 @@
5 5  
6 6 #include "nearfield.h"
7 7 #include "microscope.h"
8   -#include "colormap.h"
  8 +#include "rts/graphics/colormap.h"
9 9 #include "fileout.h"
10 10 //extern nearfieldStruct* NF;
11 11 extern microscopeStruct* SCOPE;
... ... @@ -23,7 +23,179 @@ using namespace std;
23 23 #include <boost/program_options.hpp>
24 24 namespace po = boost::program_options;
25 25  
26   -static void loadSpheres(string sphereList)
  26 +extern bool verbose;
  27 +
  28 +
  29 +
  30 +static void lNearfield(po::variables_map vm)
  31 +{
  32 + //test to see if we are simulating a plane wave
  33 + bool planeWave = DEFAULT_PLANEWAVE;
  34 + if(vm.count("plane-wave"))
  35 + planeWave = !planeWave;
  36 + SCOPE->nf.planeWave = planeWave;
  37 +
  38 + //get the incident field amplitude
  39 + SCOPE->nf.A = vm["amplitude"].as<ptype>();
  40 +
  41 + //get the condenser parameters
  42 + SCOPE->nf.condenser[0] = DEFAULT_CONDENSER_MIN;
  43 + SCOPE->nf.condenser[1] = DEFAULT_CONDENSER_MAX;
  44 +
  45 + if(vm.count("condenser"))
  46 + {
  47 + vector<ptype> cparams = vm["condenser"].as< vector<ptype> >();
  48 +
  49 + if(cparams.size() == 1)
  50 + SCOPE->nf.condenser[1] = cparams[0];
  51 + else
  52 + {
  53 + SCOPE->nf.condenser[0] = cparams[0];
  54 + SCOPE->nf.condenser[1] = cparams[1];
  55 + }
  56 + }
  57 +
  58 +
  59 + //get the focal rtsPoint position
  60 + SCOPE->nf.focus[0] = DEFAULT_FOCUS_X;
  61 + SCOPE->nf.focus[1] = DEFAULT_FOCUS_Y;
  62 + SCOPE->nf.focus[2] = DEFAULT_FOCUS_Z;
  63 + if(vm.count("focus"))
  64 + {
  65 + vector<ptype> fpos = vm["focus"].as< vector<ptype> >();
  66 + if(fpos.size() != 3)
  67 + {
  68 + cout<<"BIMSIM Error - the incident focal point is incorrectly specified; it must have three components."<<endl;
  69 + exit(1);
  70 + }
  71 + SCOPE->nf.focus[0] = fpos[0];
  72 + SCOPE->nf.focus[1] = fpos[1];
  73 + SCOPE->nf.focus[2] = fpos[2];
  74 + }
  75 +
  76 + //get the incident light direction (k-vector)
  77 + bsVector spherical(1, 0, 0);
  78 +
  79 + //if a k-vector is specified
  80 + if(vm.count("k"))
  81 + {
  82 + vector<ptype> kvec = vm["k"].as< vector<ptype> >();
  83 + if(kvec.size() != 2)
  84 + {
  85 + cout<<"BIMSIM Error - k-vector is not specified correctly: it must contain two elements"<<endl;
  86 + exit(1);
  87 + }
  88 + spherical[1] = kvec[0];
  89 + spherical[2] = kvec[1];
  90 + }
  91 + SCOPE->nf.k = spherical.sph2cart();
  92 +
  93 +
  94 + //incident field order
  95 + SCOPE->nf.m = vm["field-order"].as<int>();
  96 +
  97 + //number of Monte-Carlo samples
  98 + SCOPE->nf.nWaves = vm["samples"].as<int>();
  99 +
  100 + //random number seed for Monte-Carlo samples
  101 + if(vm.count("seed"))
  102 + srand(vm["seed"].as<unsigned int>());
  103 +
  104 +
  105 +
  106 +}
  107 +
  108 +
  109 +static void loadOutputParams(po::variables_map vm)
  110 +{
  111 + //append simulation results to previous binary files
  112 + gFileOut.append = DEFAULT_APPEND;
  113 + if(vm.count("append"))
  114 + gFileOut.append = true;
  115 +
  116 + //image parameters
  117 + //component of the field to be saved
  118 + std::string fieldStr;
  119 + fieldStr = vm["output-type"].as<string>();
  120 +
  121 + if(fieldStr == "magnitude")
  122 + gFileOut.field = fileoutStruct::fieldMag;
  123 + else if(fieldStr == "intensity")
  124 + gFileOut.field = fileoutStruct::fieldIntensity;
  125 + else if(fieldStr == "polarization")
  126 + gFileOut.field = fileoutStruct::fieldPolar;
  127 + else if(fieldStr == "imaginary")
  128 + gFileOut.field = fileoutStruct::fieldImag;
  129 + else if(fieldStr == "real")
  130 + gFileOut.field = fileoutStruct::fieldReal;
  131 + else if(fieldStr == "angular-spectrum")
  132 + gFileOut.field = fileoutStruct::fieldAngularSpectrum;
  133 +
  134 +
  135 + //image file names
  136 + gFileOut.intFile = vm["intensity"].as<string>();
  137 + gFileOut.absFile = vm["absorbance"].as<string>();
  138 + gFileOut.transFile = vm["transmittance"].as<string>();
  139 + gFileOut.nearFile = vm["near-field"].as<string>();
  140 + gFileOut.farFile = vm["far-field"].as<string>();
  141 +
  142 + //colormap
  143 + std::string cmapStr;
  144 + cmapStr = vm["colormap"].as<string>();
  145 + if(cmapStr == "brewer")
  146 + gFileOut.colormap = rts::cmBrewer;
  147 + else if(cmapStr == "gray")
  148 + gFileOut.colormap = rts::cmGrayscale;
  149 + else
  150 + cout<<"color-map value not recognized (using default): "<<cmapStr<<endl;
  151 +}
  152 +
  153 +void lFlags(po::variables_map vm, po::options_description desc)
  154 +{
  155 + //display help and exit
  156 + if(vm.count("help"))
  157 + {
  158 + cout<<desc<<endl;
  159 + exit(1);
  160 + }
  161 +
  162 + //flag for verbose output
  163 + if(vm.count("verbose"))
  164 + verbose = true;
  165 +
  166 + if(vm.count("recursive"))
  167 + {
  168 + SCOPE->nf.lut_us = false;
  169 + SCOPE->nf.lut_uf = false;
  170 + }
  171 + else if(vm.count("recursive-us"))
  172 + {
  173 + SCOPE->nf.lut_us = false;
  174 + }
  175 + else if(vm.count("lut-uf"))
  176 + {
  177 + SCOPE->nf.lut_uf = true;
  178 + }
  179 +}
  180 +
  181 +void lWavelength(po::variables_map vm)
  182 +{
  183 + //load the wavelength
  184 + if(vm.count("nu"))
  185 + {
  186 + //wavelength is given in wavenumber - transform and flag
  187 + SCOPE->nf.lambda = 10000/vm["nu"].as<ptype>();
  188 + gFileOut.wavenumber = true;
  189 + }
  190 + //otherwise we are using lambda = wavelength
  191 + else
  192 + {
  193 + SCOPE->nf.lambda = vm["lambda"].as<ptype>();
  194 + gFileOut.wavenumber = false;
  195 + }
  196 +}
  197 +
  198 +static void lSpheres(string sphereList)
27 199 {
28 200 /*This function loads a list of sphere given in the string sphereList
29 201 The format is:
... ... @@ -58,17 +230,60 @@ static void loadSpheres(string sphereList)
58 230 //check out the next element (this should set the EOF error flag)
59 231 ss.peek();
60 232 }
  233 +}
61 234  
  235 +void lSpheres(po::variables_map vm)
  236 +{
  237 + //if a sphere is specified at the command line
  238 + if(vm.count("spheres"))
  239 + {
  240 + //convert the sphere to a string
  241 + vector<ptype> sdesc = vm["spheres"].as< vector<ptype> >();
62 242  
  243 + //compute the number of spheres specified
  244 + unsigned int nS;
  245 + if(sdesc.size() <= 5)
  246 + nS = 1;
  247 + else
  248 + {
  249 + //if the number of parameters is divisible by 4, compute the number of spheres
  250 + if(sdesc.size() % 5 == 0)
  251 + nS = sdesc.size() / 5;
  252 + else
  253 + {
  254 + cout<<"BIMSIM Error: Invalid number of sphere parameters."<<endl;
  255 + exit(1);
  256 + }
  257 + }
63 258  
64   -}
  259 + stringstream ss;
  260 +
  261 + //for each sphere
  262 + for(unsigned int s=0; s<nS; s++)
  263 + {
  264 + //compute the number of sphere parameters
  265 + unsigned int nP;
  266 + if(nS == 1) nP = sdesc.size();
  267 + else nP = 5;
  268 +
  269 + //store each parameter as a string
  270 + for(unsigned int i=0; i<nP; i++)
  271 + {
  272 + ss<<sdesc[s*5 + i]<<" ";
  273 + }
  274 + ss<<endl;
  275 + }
  276 <