### Blame view

stim/optics/scalarwave.h 13.7 KB
 ```1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25``` `````` #ifndef STIM_SCALARWAVE_H #define STIM_SCALARWAVE_H #include #include #include //#include "../math/vector.h" #include "../math/vec3.h" #include "../math/quaternion.h" #include "../math/constants.h" #include "../math/plane.h" #include "../math/complex.h" //CUDA #include "../cuda/cudatools/devices.h" #include "../cuda/cudatools/error.h" #include "../cuda/sharedmem.cuh" namespace stim{ template class scalarwave{ `````` `26` `````` public: `````` ```27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62``` `````` stim::vec3 k; //k-vector, pointed in propagation direction with magnitude |k| = tau / lambda = 2pi / lambda stim::complex E0; //amplitude /// Bend a plane wave via refraction, given that the new propagation direction is known CUDA_CALLABLE scalarwave bend(stim::vec3 kn) const{ return scalarwave(kn.norm() * kmag(), E0); } public: ///constructor: create a plane wave propagating along k CUDA_CALLABLE scalarwave(vec3 kvec = stim::vec3(0, 0, (T)stim::TAU), complex E = 1){ k = kvec; E0 = E; } CUDA_CALLABLE scalarwave(T kx, T ky, T kz, complex E = 1){ k = vec3(kx, ky, kz); E0 = E; } ///multiplication operator: scale E0 CUDA_CALLABLE scalarwave & operator* (const T & rhs){ E0 = E0 * rhs; return *this; } CUDA_CALLABLE T lambda() const{ return stim::TAU / k.len(); } CUDA_CALLABLE T kmag() const{ return k.len(); } `````` `63` `````` CUDA_CALLABLE complex E(){ `````` ```64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212``` `````` return E0; } CUDA_CALLABLE vec3 kvec(){ return k; } /// calculate the value of the field produced by the plane wave given a three-dimensional position CUDA_CALLABLE complex pos(T x, T y, T z){ return pos( stim::vec3(x, y, z) ); } CUDA_CALLABLE complex pos(vec3 p = vec3(0, 0, 0)){ return E0 * exp(complex(0, k.dot(p))); } //scales k based on a transition from material ni to material nt CUDA_CALLABLE scalarwave n(T ni, T nt){ return scalarwave(k * (nt / ni), E0); } CUDA_CALLABLE scalarwave refract(stim::vec3 kn) const{ return bend(kn); } /// Calculate the result of a plane wave hitting an interface between two refractive indices /// @param P is a plane representing the position and orientation of the surface /// @param n0 is the refractive index outside of the surface (in the direction of the normal) /// @param n1 is the refractive index inside the surface (in the direction away from the normal) /// @param r is the reflected component of the plane wave /// @param t is the transmitted component of the plane wave void scatter(stim::plane P, T n0, T n1, scalarwave &r, scalarwave &t){ scatter(P, n1/n0, r, t); } /// Calculate the scattering result when nr = n1/n0 /// @param P is a plane representing the position and orientation of the surface /// @param r is the ration n1/n0 /// @param n1 is the refractive index inside the surface (in the direction away from the normal) /// @param r is the reflected component of the plane wave /// @param t is the transmitted component of the plane wave void scatter(stim::plane P, T nr, scalarwave &r, scalarwave &t){ /* int facing = P.face(k); //determine which direction the plane wave is coming in if(facing == -1){ //if the wave hits the back of the plane, invert the plane and nr P = P.flip(); //flip the plane nr = 1/nr; //invert the refractive index (now nr = n0/n1) } //use Snell's Law to calculate the transmitted angle T cos_theta_i = k.norm().dot(-P.norm()); //compute the cosine of theta_i T theta_i = acos(cos_theta_i); //compute theta_i T sin_theta_t = (1/nr) * sin(theta_i); //compute the sine of theta_t using Snell's law T theta_t = asin(sin_theta_t); //compute the cosine of theta_t bool tir = false; //flag for total internal reflection if(theta_t != theta_t){ tir = true; theta_t = stim::PI / (T)2; } //handle the degenerate case where theta_i is 0 (the plane wave hits head-on) if(theta_i == 0){ T rp = (1 - nr) / (1 + nr); //compute the Fresnel coefficients T tp = 2 / (1 + nr); vec3 kr = -k; vec3 kt = k * nr; //set the k vectors for theta_i = 0 vec3< complex > Er = E0 * rp; //compute the E vectors vec3< complex > Et = E0 * tp; T phase_t = P.p().dot(k - kt); //compute the phase offset T phase_r = P.p().dot(k - kr); //create the plane waves r = planewave(kr, Er, phase_r); t = planewave(kt, Et, phase_t); return; } //compute the Fresnel coefficients T rp, rs, tp, ts; rp = tan(theta_t - theta_i) / tan(theta_t + theta_i); rs = sin(theta_t - theta_i) / sin(theta_t + theta_i); if(tir){ tp = ts = 0; } else{ tp = ( 2 * sin(theta_t) * cos(theta_i) ) / ( sin(theta_t + theta_i) * cos(theta_t - theta_i) ); ts = ( 2 * sin(theta_t) * cos(theta_i) ) / sin(theta_t + theta_i); } //compute the coordinate space for the plane of incidence vec3 z_hat = -P.norm(); vec3 y_hat = P.parallel(k).norm(); vec3 x_hat = y_hat.cross(z_hat).norm(); //compute the k vectors for r and t vec3 kr, kt; kr = ( y_hat * sin(theta_i) - z_hat * cos(theta_i) ) * kmag(); kt = ( y_hat * sin(theta_t) + z_hat * cos(theta_t) ) * kmag() * nr; //compute the magnitude of the p- and s-polarized components of the incident E vector complex Ei_s = E0.dot(x_hat); int sgn = E0.dot(y_hat).sgn(); vec3< complex > cx_hat = x_hat; complex Ei_p = ( E0 - cx_hat * Ei_s ).len() * sgn; //compute the magnitude of the p- and s-polarized components of the reflected E vector complex Er_s = Ei_s * rs; complex Er_p = Ei_p * rp; //compute the magnitude of the p- and s-polarized components of the transmitted E vector complex Et_s = Ei_s * ts; complex Et_p = Ei_p * tp; //compute the reflected E vector vec3< complex > Er = vec3< complex >(y_hat * cos(theta_i) + z_hat * sin(theta_i)) * Er_p + cx_hat * Er_s; //compute the transmitted E vector vec3< complex > Et = vec3< complex >(y_hat * cos(theta_t) - z_hat * sin(theta_t)) * Et_p + cx_hat * Et_s; T phase_t = P.p().dot(k - kt); T phase_r = P.p().dot(k - kr); //create the plane waves r.k = kr; r.E0 = Er * exp( complex(0, phase_r) ); t.k = kt; t.E0 = Et * exp( complex(0, phase_t) ); */ } std::string str() { std::stringstream ss; ss<<"Plane Wave:"< __global__ void cuda_scalarwave(stim::complex* F, size_t N, T* x, T* y, T* z, stim::scalarwave* W, size_t n_waves){ extern __shared__ stim::scalarwave shared_W[]; //declare the list of waves in shared memory `````` `213` `````` stim::cuda::threadedMemcpy(shared_W, W, n_waves, threadIdx.x, blockDim.x); //copy the plane waves into shared memory for faster access `````` ```214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237``` `````` __syncthreads(); //synchronize threads to insure all data is copied size_t i = blockIdx.x * blockDim.x + threadIdx.x; //get the index into the array if(i >= N) return; //exit if this thread is outside the array T px, py, pz; (x == NULL) ? px = 0 : px = x[i]; // test for NULL values and set positions (y == NULL) ? py = 0 : py = y[i]; (z == NULL) ? pz = 0 : pz = z[i]; stim::complex f = 0; //create a register to store the result for(size_t w = 0; w < n_waves; w++) f += shared_W[w].pos(px, py, pz); //evaluate the plane wave F[i] += f; //copy the result to device memory } /// evaluate a scalar wave at several points, where all arrays are on the GPU template void gpu_scalarwave(stim::complex* F, size_t N, T* x, T* y, T* z, stim::scalarwave w){ int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device dim3 blocks(N / threads + 1); //calculate the optimal number of blocks cuda_scalarwave<<< blocks, threads >>>(F, N, x, y, z, w); //call the kernel } `````` ```238 239 240 241 242``` `````` template void gpu_scalarwaves(stim::complex* F, size_t N, T* x, T* y, T* z, stim::scalarwave* W, size_t nW){ size_t wave_bytes = sizeof(stim::scalarwave); size_t shared_bytes = stim::sharedMemPerBlock(); //calculate the maximum amount of shared memory available `````` `243` `````` size_t max_batch = shared_bytes / wave_bytes; //calculate number of plane waves that will fit into shared memory `````` ```244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263``` `````` size_t batch_bytes = min(nW, max_batch) * wave_bytes; //initialize the batch size (in bytes) to the maximum batch required stim::scalarwave* batch_W; HANDLE_ERROR(cudaMalloc(&batch_W, batch_bytes)); //allocate memory for a single batch of plane waves int threads = stim::maxThreadsPerBlock(); //get the maximum number of threads per block for the CUDA device dim3 blocks((unsigned)(N / threads + 1)); //calculate the optimal number of blocks size_t batch_size; //declare a variable to store the size of the current batch size_t waves_processed = 0; //initialize the number of waves processed to zero while(waves_processed < nW){ //while there are still waves to be processed batch_size = min(max_batch, nW - waves_processed); //process either a whole batch, or whatever is left batch_bytes = batch_size * sizeof(stim::scalarwave); HANDLE_ERROR(cudaMemcpy(batch_W, W + waves_processed, batch_bytes, cudaMemcpyDeviceToDevice)); //copy the plane waves into global memory cuda_scalarwave<<< blocks, threads, batch_bytes >>>(F, N, x, y, z, batch_W, batch_size); //call the kernel waves_processed += batch_size; //increment the counter indicating how many waves have been processed } cudaFree(batch_W); } `````` ```264 265 266 267 268 269 270 271 272 273``` `````` /// Sums a series of coherent plane waves at a specified point /// @param field is the output array of field values corresponding to each input point /// @param x is an array of x coordinates for the field point /// @param y is an array of y coordinates for the field point /// @param z is an array of z coordinates for the field point /// @param N is the number of points in the input and output arrays /// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength) /// @param A is the list of amplitudes for each wave /// @param S is the list of propagation directions for each wave template `````` ```274 275 276``` `````` void cpu_scalarwaves(stim::complex* F, size_t N, T* x, T* y, T* z, std::vector< stim::scalarwave > W){ size_t S = W.size(); //store the number of waves #ifdef __CUDACC__ `````` ```277 278``` `````` stim::complex* dev_F; //allocate space for the field cudaMalloc(&dev_F, N * sizeof(stim::complex)); `````` ```279 280``` `````` cudaMemcpy(dev_F, F, N * sizeof(stim::complex), cudaMemcpyHostToDevice); //cudaMemset(dev_F, 0, N * sizeof(stim::complex)); //set the field to zero (necessary because a sum is used) `````` ```281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299``` `````` T* dev_x = NULL; //allocate space and copy the X coordinate (if specified) if(x != NULL){ HANDLE_ERROR(cudaMalloc(&dev_x, N * sizeof(T))); HANDLE_ERROR(cudaMemcpy(dev_x, x, N * sizeof(T), cudaMemcpyHostToDevice)); } T* dev_y = NULL; //allocate space and copy the Y coordinate (if specified) if(y != NULL){ HANDLE_ERROR(cudaMalloc(&dev_y, N * sizeof(T))); HANDLE_ERROR(cudaMemcpy(dev_y, y, N * sizeof(T), cudaMemcpyHostToDevice)); } T* dev_z = NULL; //allocate space and copy the Z coordinate (if specified) if(z != NULL){ HANDLE_ERROR(cudaMalloc(&dev_z, N * sizeof(T))); HANDLE_ERROR(cudaMemcpy(dev_z, z, N * sizeof(T), cudaMemcpyHostToDevice)); } `````` ```300 301 302``` `````` stim::scalarwave* dev_W; HANDLE_ERROR( cudaMalloc(&dev_W, sizeof(stim::scalarwave) * W.size()) ); HANDLE_ERROR( cudaMemcpy(dev_W, &W[0], sizeof(stim::scalarwave) * W.size(), cudaMemcpyHostToDevice) ); `````` `303` `````` `````` `304` `````` gpu_scalarwaves(dev_F, N, dev_x, dev_y, dev_z, dev_W, W.size()); `````` ```305 306 307 308 309 310 311``` `````` cudaMemcpy(F, dev_F, N * sizeof(stim::complex), cudaMemcpyDeviceToHost); //copy the field from device memory if(x != NULL) cudaFree(dev_x); //free everything if(y != NULL) cudaFree(dev_y); if(z != NULL) cudaFree(dev_z); cudaFree(dev_F); `````` ```312 313 314 315 316 317 318``` `````` #else memset(F, 0, N * sizeof(stim::complex)); T px, py, pz; for(size_t i = 0; i < N; i++){ // for each element in the array (x == NULL) ? px = 0 : px = x[i]; // test for NULL values (y == NULL) ? py = 0 : py = y[i]; (z == NULL) ? pz = 0 : pz = z[i]; `````` `319` `````` `````` ```320 321 322 323``` `````` for(size_t s = 0; s < S; s++){ F[i] += w_array[s].pos(px, py, pz); //sum all plane waves at this point } } `````` ```324 325 326 327 328 329``` `````` #endif } template void cpu_scalarwave(stim::complex* F, size_t N, T* x, T* y, T* z, stim::scalarwave w){ std::vector< stim::scalarwave > w_array(1, w); `````` `330` `````` cpu_scalarwaves(F, N, x, y, z, w_array); `````` ```331 332 333 334 335 336``` `````` } template void cpu_scalarwaves(stim::complex* F, size_t N, T* x, T* y, T* z, stim::scalarwave w){ std::vector< stim::scalarwave > w_array(1, w); cpu_scalarwaves(F, N, x, y, z, w_array); `````` ```337 338 339 340 341 342 343 344 345 346 347``` `````` } /// Sums a series of coherent plane waves at a specified point /// @param x is the x coordinate of the field point /// @param y is the y coordinate of the field point /// @param z is the z coordinate of the field point /// @param lambda is the wavelength (all coherent waves are assumed to have the same wavelength) /// @param A is the list of amplitudes for each wave /// @param S is the list of propagation directions for each wave template `````` `348` `````` CUDA_CALLABLE stim::complex cpu_scalarwaves(T x, T y, T z, std::vector< stim::scalarwave > W){ `````` ```349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367``` `````` size_t N = W.size(); //get the number of plane wave samples stim::complex field(0, 0); //initialize the field to zero (0) stim::vec3 k; //allocate space for the direction vector for(size_t i = 0; i < N; i++){ field += W[i].pos(x, y, z); } return field; } } //end namespace stim template std::ostream& operator<<(std::ostream& os, stim::scalarwave p) { os<