Blame view

stim/optics/scalarwave.h 13.7 KB
8e4f8364   David Mayerich   started a new opt...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
  #ifndef STIM_SCALARWAVE_H
  #define STIM_SCALARWAVE_H
  
  
  #include <string>
  #include <sstream>
  #include <cmath>
  
  //#include "../math/vector.h"
  #include "../math/vec3.h"
  #include "../math/quaternion.h"
  #include "../math/constants.h"
  #include "../math/plane.h"
  #include "../math/complex.h"
  
  //CUDA
  #include "../cuda/cudatools/devices.h"
  #include "../cuda/cudatools/error.h"
  #include "../cuda/sharedmem.cuh"
  
  namespace stim{
  
  template<typename T>
  class scalarwave{
  
31262e83   David Mayerich   GPU implementatio...
26
  public:
8e4f8364   David Mayerich   started a new opt...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  
  	stim::vec3<T> k;							//k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda
  	stim::complex<T> E0;						//amplitude
  
  	/// Bend a plane wave via refraction, given that the new propagation direction is known
  	CUDA_CALLABLE scalarwave<T> bend(stim::vec3<T> kn) const{
  		return scalarwave<T>(kn.norm() * kmag(), E0);
  	}
  
  public:
  
  	///constructor: create a plane wave propagating along k
  	CUDA_CALLABLE scalarwave(vec3<T> kvec = stim::vec3<T>(0, 0, (T)stim::TAU), complex<T> E = 1){
  		k = kvec;
  		E0 = E;
  	}
  
  	CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex<T> E = 1){
  		k = vec3<T>(kx, ky, kz);
  		E0 = E;
  	}
  
  	///multiplication operator: scale E0
      CUDA_CALLABLE scalarwave<T> & operator* (const T & rhs){		
  		E0 = E0 * rhs;
  		return *this;
  	}
  
  	CUDA_CALLABLE T lambda() const{
  		return stim::TAU / k.len();
  	}
  
  	CUDA_CALLABLE T kmag() const{
  		return k.len();
  	}
  
9339fbad   David Mayerich   implementing mie ...
63
  	CUDA_CALLABLE complex<T> E(){
8e4f8364   David Mayerich   started a new opt...
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
  		return E0;
  	}
  
  	CUDA_CALLABLE vec3<T> kvec(){
  		return k;
  	}
  
  	/// calculate the value of the field produced by the plane wave given a three-dimensional position
  	CUDA_CALLABLE complex<T> pos(T x, T y, T z){
  		return pos( stim::vec3<T>(x, y, z) );
  	}
  
  	CUDA_CALLABLE complex<T> pos(vec3<T> p = vec3<T>(0, 0, 0)){
  		return E0 * exp(complex<T>(0, k.dot(p)));
  	}
  
  	//scales k based on a transition from material ni to material nt
  	CUDA_CALLABLE scalarwave<T> n(T ni, T nt){
  		return scalarwave<T>(k * (nt / ni), E0);
  	}
  
  	CUDA_CALLABLE scalarwave<T> refract(stim::vec3<T> kn) const{
  		return bend(kn);
  	}
  
  	/// Calculate the result of a plane wave hitting an interface between two refractive indices
  
  	/// @param P is a plane representing the position and orientation of the surface
  	/// @param n0 is the refractive index outside of the surface (in the direction of the normal)
  	/// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  	/// @param r is the reflected component of the plane wave
  	/// @param t is the transmitted component of the plane wave
  	void scatter(stim::plane<T> P, T n0, T n1, scalarwave<T> &r, scalarwave<T> &t){
  		scatter(P, n1/n0, r, t);
  	}
  
  	/// Calculate the scattering result when nr = n1/n0
  
  	/// @param P is a plane representing the position and orientation of the surface
  	/// @param r is the ration n1/n0
  	/// @param n1 is the refractive index inside the surface (in the direction away from the normal)
  	/// @param r is the reflected component of the plane wave
  	/// @param t is the transmitted component of the plane wave
  	void scatter(stim::plane<T> P, T nr, scalarwave<T> &r, scalarwave<T> &t){
  		/*
  		int facing = P.face(k);		//determine which direction the plane wave is coming in
  
  		if(facing == -1){		//if the wave hits the back of the plane, invert the plane and nr
  			P = P.flip();			//flip the plane
  			nr = 1/nr;				//invert the refractive index (now nr = n0/n1)
  		}
  
  		//use Snell's Law to calculate the transmitted angle
  		T cos_theta_i = k.norm().dot(-P.norm());				//compute the cosine of theta_i
  		T theta_i = acos(cos_theta_i);							//compute theta_i
  		T sin_theta_t = (1/nr) * sin(theta_i);						//compute the sine of theta_t using Snell's law
  		T theta_t = asin(sin_theta_t);							//compute the cosine of theta_t
  
  		bool tir = false;						//flag for total internal reflection
  		if(theta_t != theta_t){
  			tir = true;
  			theta_t = stim::PI / (T)2;
  		}
  
  		//handle the degenerate case where theta_i is 0 (the plane wave hits head-on)
  		if(theta_i == 0){
  			T rp = (1 - nr) / (1 + nr);		//compute the Fresnel coefficients
  			T tp = 2 / (1 + nr);
  			vec3<T> kr = -k;
  			vec3<T> kt = k * nr;			//set the k vectors for theta_i = 0
  			vec3< complex<T> > Er = E0 * rp;		//compute the E vectors
  			vec3< complex<T> > Et = E0 * tp;
  			T phase_t = P.p().dot(k - kt);	//compute the phase offset
  			T phase_r = P.p().dot(k - kr);
  
  			//create the plane waves
  			r = planewave<T>(kr, Er, phase_r);
  			t = planewave<T>(kt, Et, phase_t);
  			return;
  		}
  
  
  		//compute the Fresnel coefficients
  		T rp, rs, tp, ts;
  		rp = tan(theta_t - theta_i) / tan(theta_t + theta_i);
  		rs = sin(theta_t - theta_i) / sin(theta_t + theta_i);
  		
  		if(tir){
  			tp = ts = 0;
  		}
  		else{
  			tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) );
  			ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i);
  		}
  
  		//compute the coordinate space for the plane of incidence
  		vec3<T> z_hat = -P.norm();
  		vec3<T> y_hat = P.parallel(k).norm();
  		vec3<T> x_hat = y_hat.cross(z_hat).norm();
  
  		//compute the k vectors for r and t
  		vec3<T> kr, kt;
  		kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag();
  		kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr;
  
  		//compute the magnitude of the p- and s-polarized components of the incident E vector
  		complex<T> Ei_s = E0.dot(x_hat);
  		int sgn = E0.dot(y_hat).sgn();
  		vec3< complex<T> > cx_hat = x_hat;
  		complex<T> Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn;
  		//compute the magnitude of the p- and s-polarized components of the reflected E vector
  		complex<T> Er_s = Ei_s * rs;
  		complex<T> Er_p = Ei_p * rp;
  		//compute the magnitude of the p- and s-polarized components of the transmitted E vector
  		complex<T> Et_s = Ei_s * ts;
  		complex<T> Et_p = Ei_p * tp;
  
  		//compute the reflected E vector
  		vec3< complex<T> > Er = vec3< complex<T> >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s;
  		//compute the transmitted E vector
  		vec3< complex<T> > Et = vec3< complex<T> >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s;
  
  		T phase_t = P.p().dot(k - kt);
  		T phase_r = P.p().dot(k - kr);
  
  		//create the plane waves
  		r.k = kr;
  		r.E0 = Er * exp( complex<T>(0, phase_r) );
  
  		t.k = kt;
  		t.E0 = Et * exp( complex<T>(0, phase_t) );
  		*/
  	}
  
  	std::string str()
  	{
  		std::stringstream ss;
  		ss<<"Plane Wave:"<<std::endl;
  		ss<<"	"<<E0<<" e^i ( "<<k<<" . r )";
  		return ss.str();
  	}
  };					//end planewave class
  
  
  /// CUDA kernel for computing the field produced by a batch of plane waves at an array of locations
  template<typename T>
  __global__ void cuda_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t n_waves){
  	extern __shared__ stim::scalarwave<T> shared_W[];		//declare the list of waves in shared memory
  
963d0676   David Mayerich   bug fixes related...
213
  	stim::cuda::threadedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x);	//copy the plane waves into shared memory for faster access
8e4f8364   David Mayerich   started a new opt...
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
  	__syncthreads();															//synchronize threads to insure all data is copied
  
  	size_t i = blockIdx.x * blockDim.x + threadIdx.x;				//get the index into the array
  	if(i >= N) return;												//exit if this thread is outside the array
  	T px, py, pz;
  	(x == NULL) ? px = 0 : px = x[i];								// test for NULL values and set positions
  	(y == NULL) ? py = 0 : py = y[i];
  	(z == NULL) ? pz = 0 : pz = z[i];
  	
  	stim::complex<T> f = 0;											//create a register to store the result
  	for(size_t w = 0; w < n_waves; w++)
  		f += shared_W[w].pos(px, py, pz);							//evaluate the plane wave
  	F[i] += f;														//copy the result to device memory
  }
  
  /// evaluate a scalar wave at several points, where all arrays are on the GPU
  template<typename T>
  void gpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  	
  	int threads = stim::maxThreadsPerBlock();			//get the maximum number of threads per block for the CUDA device
  	dim3 blocks(N / threads + 1);						//calculate the optimal number of blocks
  	cuda_scalarwave<T><<< blocks, threads >>>(F, N, x, y, z, w);			//call the kernel
  }
  
9339fbad   David Mayerich   implementing mie ...
238
239
240
241
242
  template<typename T>
  void gpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T>* W, size_t nW){
  
  	size_t wave_bytes = sizeof(stim::scalarwave<T>);
  	size_t shared_bytes = stim::sharedMemPerBlock();									//calculate the maximum amount of shared memory available
9339fbad   David Mayerich   implementing mie ...
243
  	size_t max_batch = shared_bytes / wave_bytes;				//calculate number of plane waves that will fit into shared memory
9339fbad   David Mayerich   implementing mie ...
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
  	size_t batch_bytes = min(nW, max_batch) * wave_bytes;				//initialize the batch size (in bytes) to the maximum batch required
  
  	stim::scalarwave<T>* batch_W;
  	HANDLE_ERROR(cudaMalloc(&batch_W, batch_bytes));										//allocate memory for a single batch of plane waves
  
  	int threads = stim::maxThreadsPerBlock();							//get the maximum number of threads per block for the CUDA device
  	dim3 blocks((unsigned)(N / threads + 1));										//calculate the optimal number of blocks	
  
  	size_t batch_size;																	//declare a variable to store the size of the current batch
  	size_t waves_processed = 0;															//initialize the number of waves processed to zero
  	while(waves_processed < nW){												//while there are still waves to be processed
  		batch_size = min<size_t>(max_batch, nW - waves_processed);			//process either a whole batch, or whatever is left
  		batch_bytes = batch_size * sizeof(stim::scalarwave<T>);
  		HANDLE_ERROR(cudaMemcpy(batch_W, W + waves_processed, batch_bytes, cudaMemcpyDeviceToDevice));	//copy the plane waves into global memory
  		cuda_scalarwave<T><<< blocks, threads, batch_bytes >>>(F, N, x, y, z, batch_W, batch_size);	//call the kernel
  		waves_processed += batch_size;													//increment the counter indicating how many waves have been processed
  	}
  	cudaFree(batch_W);
  }
  
8e4f8364   David Mayerich   started a new opt...
264
265
266
267
268
269
270
271
272
273
  /// Sums a series of coherent plane waves at a specified point
  /// @param field is the output array of field values corresponding to each input point
  /// @param x is an array of x coordinates for the field point
  /// @param y is an array of y coordinates for the field point
  /// @param z is an array of z coordinates for the field point
  /// @param N is the number of points in the input and output arrays
  /// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  /// @param A is the list of amplitudes for each wave
  /// @param S is the list of propagation directions for each wave
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
274
275
276
  void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave<T> > W){
  	size_t S = W.size();											//store the number of waves
  #ifdef __CUDACC__
8e4f8364   David Mayerich   started a new opt...
277
278
  	stim::complex<T>* dev_F;										//allocate space for the field
  	cudaMalloc(&dev_F, N * sizeof(stim::complex<T>));
9339fbad   David Mayerich   implementing mie ...
279
280
  	cudaMemcpy(dev_F, F, N * sizeof(stim::complex<T>), cudaMemcpyHostToDevice);
  	//cudaMemset(dev_F, 0, N * sizeof(stim::complex<T>));				//set the field to zero (necessary because a sum is used)
8e4f8364   David Mayerich   started a new opt...
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
  
  	T* dev_x = NULL;												//allocate space and copy the X coordinate (if specified)
  	if(x != NULL){
  		HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T)));
  		HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice));
  	}
  
  	T* dev_y = NULL;												//allocate space and copy the Y coordinate (if specified)
  	if(y != NULL){
  		HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T)));
  		HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice));
  	}
  
  	T* dev_z = NULL;												//allocate space and copy the Z coordinate (if specified)
  	if(z != NULL){
  		HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T)));
  		HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice));
  	}
  
9339fbad   David Mayerich   implementing mie ...
300
301
302
  	stim::scalarwave<T>* dev_W;
  	HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave<T>) * W.size()) );
  	HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave<T>) * W.size(), cudaMemcpyHostToDevice) );
8e4f8364   David Mayerich   started a new opt...
303
  
9339fbad   David Mayerich   implementing mie ...
304
  	gpu_scalarwaves(dev_F, N, dev_x, dev_y, dev_z, dev_W, W.size());
8e4f8364   David Mayerich   started a new opt...
305
306
307
308
309
310
311
  
  	cudaMemcpy(F, dev_F, N * sizeof(stim::complex<T>), cudaMemcpyDeviceToHost);			//copy the field from device memory
  
  	if(x != NULL) cudaFree(dev_x);														//free everything
  	if(y != NULL) cudaFree(dev_y);
  	if(z != NULL) cudaFree(dev_z);
  	cudaFree(dev_F);
9339fbad   David Mayerich   implementing mie ...
312
313
314
315
316
317
318
  #else
  	memset(F, 0, N * sizeof(stim::complex<T>));
  	T px, py, pz;
  	for(size_t i = 0; i < N; i++){										// for each element in the array
  		(x == NULL) ? px = 0 : px = x[i];								// test for NULL values
  		(y == NULL) ? py = 0 : py = y[i];
  		(z == NULL) ? pz = 0 : pz = z[i];
8e4f8364   David Mayerich   started a new opt...
319
  
9339fbad   David Mayerich   implementing mie ...
320
321
322
323
  		for(size_t s = 0; s < S; s++){
  			F[i] += w_array[s].pos(px, py, pz);						//sum all plane waves at this point
  		}
  	}
8e4f8364   David Mayerich   started a new opt...
324
325
326
327
328
329
  #endif
  }
  
  template<typename T>
  void cpu_scalarwave(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  	std::vector< stim::scalarwave<T> > w_array(1, w);
9339fbad   David Mayerich   implementing mie ...
330
  	cpu_scalarwaves(F, N, x, y, z, w_array);	
31262e83   David Mayerich   GPU implementatio...
331
332
333
334
335
336
  }
  
  template<typename T>
  void cpu_scalarwaves(stim::complex<T>* F, size_t N, T* x, T* y, T* z, stim::scalarwave<T> w){
  	std::vector< stim::scalarwave<T> > w_array(1, w);
  	cpu_scalarwaves(F, N, x, y, z, w_array);	
8e4f8364   David Mayerich   started a new opt...
337
338
339
340
341
342
343
344
345
346
347
  }
  
  
  /// Sums a series of coherent plane waves at a specified point
  /// @param x is the x coordinate of the field point
  /// @param y is the y coordinate of the field point
  /// @param z is the z coordinate of the field point
  /// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength)
  /// @param A is the list of amplitudes for each wave
  /// @param S is the list of propagation directions for each wave
  template<typename T>
9339fbad   David Mayerich   implementing mie ...
348
  CUDA_CALLABLE stim::complex<T> cpu_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave<T> > W){
8e4f8364   David Mayerich   started a new opt...
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
  	size_t N = W.size();												//get the number of plane wave samples
  	stim::complex<T> field(0, 0);										//initialize the field to zero (0)
  	stim::vec3<T> k;													//allocate space for the direction vector
  	for(size_t i = 0; i < N; i++){
  		field += W[i].pos(x, y, z);
  	}
  	return field;
  }
  
  }					//end namespace stim
  
  template <typename T>
  std::ostream& operator<<(std::ostream& os, stim::scalarwave<T> p)
  {
      os<<p.str();
      return os;
  }
  
  #endif