Blame view

stim/optics/scalarwave.h 13.3 KB
8e4f8364   David Mayerich   started a new opt...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
  #ifndef STIM_SCALARWAVE_H
  #define STIM_SCALARWAVE_H
  
  
  #include <string>
  #include <sstream>
  #include <cmath>
  
  //#include "../math/vector.h"
  #include "../math/vec3.h"
  #include "../math/quaternion.h"
  #include "../math/constants.h"
  #include "../math/plane.h"
  #include "../math/complex.h"
  
  //CUDA
  #include "../cuda/cudatools/devices.h"
  #include "../cuda/cudatools/error.h"
  #include "../cuda/sharedmem.cuh"
  
  namespace stim{
  
  template<typename T>
  class scalarwave{
  
  protected:
  
  	stim::vec3<T> k;							//k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  	stim::complex<T> E0;						//amplitude
  
  	/// Bend a plane wave via refraction, given that the new propagation direction is known
  	CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{
  		return scalarwave<T>(kn.norm() * kmag(), E0);
  	}
  
  public:
  
  	///constructor: create a plane wave propagating along k
  	CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){
  		k = kvec;
  		E0 = E;
  	}
  
  	CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){
  		k = vec3<T>(kx, ky, kz);
  		E0 = E;
  	}
  
  	///multiplication operator: scale E0
      CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){		
  		E0 = E0 * rhs;
  		return *this;
  	}
  
  	CUDA_CALLABLE T lambda() const{
  		return stim::TAU / k.len();
  	}
  
  	CUDA_CALLABLE T kmag() const{
  		return k.len();
  	}
  
  	CUDA_CALLABLE vec3< complex<T> > E(){
  		return E0;
  	}
  
  	CUDA_CALLABLE vec3<T> kvec(){
  		return k;
  	}
  
  	/// calculate the value of the field produced by the plane wave given a three-dimensional position
  	CUDA_CALLABLE complex<T> pos(T x, T y, T z){
  		return pos( stim::vec3<T>(x, y, z) );
  	}
  
  	CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){
  		return E0 * exp(complex<T>(0, k.dot(p)));
  	}
  
  	//scales k based on a transition from material ni to material nt
  	CUDA_CALLABLE scalarwave<T> n(T ni, T nt){
  		return scalarwave<T>(k * (nt / ni), E0);
  	}
  
  	CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{
  		return bend(kn);
  	}
  
  	/// Calculate the result of a plane wave hitting an interface between two refractive indices
  
  	/// @param P is a plane representing the position and orientation of the surface
  	/// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  	/// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  	/// @param r is the reflected component of the plane wave
  	/// @param t is the transmitted component of the plane wave
  	void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){
  		scatter(P, n1/n0, r, t);
  	}
  
  	/// Calculate the scattering result when nr = n1/n0
  
  	/// @param P is a plane representing the position and orientation of the surface
  	/// @param r is the ration n1/n0
  	/// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  	/// @param r is the reflected component of the plane wave
  	/// @param t is the transmitted component of the plane wave
  	void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){
  		/*
  		int facing = P.face(k);		//determine which direction the plane wave is coming in
  
  		if(facing == -1){		//if the wave hits the back of the plane, invert the plane and nr
  			P = P.flip();			//flip the plane
  			nr = 1/nr;				//invert the refractive index (now nr = n0/n1)
  		}
  
  		//use Snell's Law to calculate the transmitted angle
  		T cos_theta_i = k.norm().dot(-P.norm());				//compute the cosine of theta_i
  		T theta_i = acos(cos_theta_i);							//compute theta_i
  		T sin_theta_t = (1/nr) * sin(theta_i);						//compute the sine of theta_t using Snell's law
  		T theta_t = asin(sin_theta_t);							//compute the cosine of theta_t
  
  		bool tir = false;						//flag for total internal reflection
  		if(theta_t != theta_t){
  			tir = true;
  			theta_t = stim::PI / (T)2;
  		}
  
  		//handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  		if(theta_i == 0){
  			T rp = (1 - nr) / (1 + nr);		//compute the Fresnel coefficients
  			T tp = 2 / (1 + nr);
  			vec3<T> kr = -k;
  			vec3<T> kt = k * nr;			//set the k vectors for theta_i = 0
  			vec3< complex<T> > Er = E0 * rp;		//compute the E vectors
  			vec3< complex<T> > Et = E0 * tp;
  			T phase_t = P.p().dot(k - kt);	//compute the phase offset
  			T phase_r = P.p().dot(k - kr);
  
  			//create the plane waves
  			r = planewave<T>(kr, Er, phase_r);
  			t = planewave<T>(kt, Et, phase_t);
  			return;
  		}
  
  
  		//compute the Fresnel coefficients
  		T rp, rs, tp, ts;
  		rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  		rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  		
  		if(tir){
  			tp = ts = 0;
  		}
  		else{
  			tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  			ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  		}
  
  		//compute the coordinate space for the plane of incidence
  		vec3<T> z_hat = -P.norm();
  		vec3<T> y_hat = P.parallel(k).norm();
  		vec3<T> x_hat = y_hat.cross(z_hat).norm();
  
  		//compute the k vectors for r and t
  		vec3<T> kr, kt;
  		kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  		kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  
  		//compute the magnitude of the p- and s-polarized components of the incident E vector
  		complex<T> Ei_s = E0.dot(x_hat);
  		int sgn = E0.dot(y_hat).sgn();
  		vec3< complex<T> > cx_hat = x_hat;
  		complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  		//compute the magnitude of the p- and s-polarized components of the reflected E vector
  		complex<T> Er_s = Ei_s * rs;
  		complex<T> Er_p = Ei_p * rp;
  		//compute the magnitude of the p- and s-polarized components of the transmitted E vector
  		complex<T> Et_s = Ei_s * ts;
  		complex<T> Et_p = Ei_p * tp;
  
  		//compute the reflected E vector
  		vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  		//compute the transmitted E vector
  		vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  
  		T phase_t = P.p().dot(k - kt);
  		T phase_r = P.p().dot(k - kr);
  
  		//create the plane waves
  		r.k = kr;
  		r.E0 = Er * exp( complex<T>(0, phase_r) );
  
  		t.k = kt;
  		t.E0 = Et * exp( complex<T>(0, phase_t) );
  		*/
  	}
  
  	std::string str()
  	{
  		std::stringstream ss;
  		ss<<"Plane Wave:"<<std::endl;
  		ss<<"	"<<E0<<" e^i ( "<<k<<" . r )";
  		return ss.str();
  	}
  };					//end planewave class
  
  
  /// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations
  template<typename T>
  __global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
  	extern __shared__ stim::scalarwave<T> shared_W[];		//declare the list of waves in shared memory
  
  	stim::cuda::sharedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x);	//copy the plane waves into shared memory for faster access
  	__syncthreads();															//synchronize threads to insure all data is copied
  
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
  	if(i >= N) return;												//exit if this thread is outside the array
  	T px, py, pz;
  	(x == NULL) ? px = 0 : px = x[i];								// test for NULL values and set positions
  	(y == NULL) ? py = 0 : py = y[i];
  	(z == NULL) ? pz = 0 : pz = z[i];
  	
  	stim::complex<T> f = 0;											//create a register to store the result
  	for(size_t w = 0; w < n_waves; w++)
  		f += shared_W[w].pos(px, py, pz);							//evaluate the plane wave
  	F[i] += f;														//copy the result to device memory
  }
  
  /// evaluate a scalar wave at several points, where all arrays are on the GPU
  template<typename T>
  void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  	
  	int threads = stim::maxThreadsPerBlock();			//get the maximum number of threads per block for the CUDA device
  	dim3 blocks(N / threads + 1);						//calculate the optimal number of blocks
  	cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w);			//call the kernel
  }
  
  /// Sums a series of coherent plane waves at a specified point
  /// @param field is the output array of field values corresponding to each input point
  /// @param x is an array of x coordinates for the field point
  /// @param y is an array of y coordinates for the field point
  /// @param z is an array of z coordinates for the field point
  /// @param N is the number of points in the input and output arrays
  /// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  /// @param A is the list of amplitudes for each wave
  /// @param S is the list of propagation directions for each wave
  template<typename T>
  void cpu_sum_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > w_array){
  	size_t S = w_array.size();											//store the number of waves
  #ifdef NO_CUDA
  	memset(F, 0, N * sizeof(stim::complex<T>));
  	T px, py, pz;
  	for(size_t i = 0; i < N; i++){										// for each element in the array
  		(x == NULL) ? px = 0 : px = x[i];								// test for NULL values
  		(y == NULL) ? py = 0 : py = y[i];
  		(z == NULL) ? pz = 0 : pz = z[i];
  
  		for(size_t s = 0; s < S; s++){
  			F[i] += w_array[s].pos(px, py, pz);						//sum all plane waves at this point
  		}
  	}
  #else
  	stim::complex<T>* dev_F;										//allocate space for the field
  	cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
  	cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>));				//set the field to zero (necessary because a sum is used)
  
  	T* dev_x = NULL;												//allocate space and copy the X coordinate (if specified)
  	if(x != NULL){
  		HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  		HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  	}
  
  	T* dev_y = NULL;												//allocate space and copy the Y coordinate (if specified)
  	if(y != NULL){
  		HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  		HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  	}
  
  	T* dev_z = NULL;												//allocate space and copy the Z coordinate (if specified)
  	if(z != NULL){
  		HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  		HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  	}
  
  	size_t wave_bytes = sizeof(stim::scalarwave<T>);
  	size_t shared_bytes = stim::sharedMemPerBlock();									//calculate the maximum amount of shared memory available
  	size_t array_bytes = w_array.size() * wave_bytes;			//calculate the maximum number of bytes required for the planewave array
  	size_t max_batch = shared_bytes / wave_bytes;				//calculate number of plane waves that will fit into shared memory
  	size_t num_batches = w_array.size() / max_batch + 1;								//calculate the number of batches required to process all plane waves
  	size_t batch_bytes = min(w_array.size(), max_batch) * wave_bytes;				//initialize the batch size (in bytes) to the maximum batch required
  
  	stim::scalarwave<T>* dev_w;
  	HANDLE_ERROR(cudaMalloc(&dev_w, batch_bytes));										//allocate memory for a single batch of plane waves
  
  	int threads = stim::maxThreadsPerBlock();							//get the maximum number of threads per block for the CUDA device
  	dim3 blocks((unsigned)(N / threads + 1));										//calculate the optimal number of blocks	
  
  	size_t batch_size;																	//declare a variable to store the size of the current batch
  	size_t waves_processed = 0;															//initialize the number of waves processed to zero
  	while(waves_processed < w_array.size()){												//while there are still waves to be processed
  		batch_size = min<size_t>(max_batch, w_array.size() - waves_processed);			//process either a whole batch, or whatever is left
  		batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
  		HANDLE_ERROR(cudaMemcpy(dev_w, &w_array[waves_processed], batch_bytes, cudaMemcpyHostToDevice));	//copy the plane waves into global memory
  		cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(dev_F, N, dev_x, dev_y, dev_z, dev_w, batch_size);	//call the kernel
  		waves_processed += batch_size;													//increment the counter indicating how many waves have been processed
  	}
  
  	cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost);			//copy the field from device memory
  
  	if(x != NULL) cudaFree(dev_x);														//free everything
  	if(y != NULL) cudaFree(dev_y);
  	if(z != NULL) cudaFree(dev_z);
  	cudaFree(dev_F);
  	cudaFree(dev_w);
  
  #endif
  }
  
  template<typename T>
  void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  	std::vector< stim::scalarwave<T> > w_array(1, w);
  	cpu_sum_scalarwaves(F, N, x, y, z, w_array);	
  }
  
  
  /// Sums a series of coherent plane waves at a specified point
  /// @param x is the x coordinate of the field point
  /// @param y is the y coordinate of the field point
  /// @param z is the z coordinate of the field point
  /// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  /// @param A is the list of amplitudes for each wave
  /// @param S is the list of propagation directions for each wave
  template<typename T>
  CUDA_CALLABLE stim::complex<T> sum_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
  	size_t N = W.size();												//get the number of plane wave samples
  	stim::complex<T> field(0, 0);										//initialize the field to zero (0)
  	stim::vec3<T> k;													//allocate space for the direction vector
  	for(size_t i = 0; i < N; i++){
  		field += W[i].pos(x, y, z);
  	}
  	return field;
  }
  
  }					//end namespace stim
  
  template <typename T>
  std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p)
  {
      os<<p.str();
      return os;
  }
  
  #endif