### Blame view

stim/optics_old/efield.cuh 11.5 KB
 ```1 2 3``` `````` #ifndef RTS_EFIELD #define RTS_EFIELD `````` `4` `````` #include "../math/complex.h" `````` `5` `````` #include "../math/realfield.cuh" `````` `6` `````` #include "../visualization/colormap.h" `````` ```7 8``` `````` #include "../optics/planewave.h" #include "../cuda/devices.h" `````` `9` `````` #include "../optics/beam.h" `````` `10` `````` #include "../math/rect.h" `````` `11` `````` `````` `12` `````` `````` `13` `````` namespace stim{ `````` `14` `````` `````` ```15 16``` `````` template __global__ void gpu_planewave2efield(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, `````` `17` `````` planewave w, rect q) `````` ```18 19 20 21 22 23 24 25 26 27 28 29 30 31``` `````` { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; //get the current position vec p = q( (T)iu/(T)r0, (T)iv/(T)r1 ); vec r(p[0], p[1], p[2]); `````` `32` `````` complex x( 0.0f, w.k.dot(r) ); `````` ```33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65``` `````` if(Y == NULL) //if this is a scalar simulation X[i] += w.E0.len() * exp(x); //use the vector magnitude as the plane wave amplitude else { X[i] += w.E0[0] * exp(x); Y[i] += w.E0[1] * exp(x); Z[i] += w.E0[2] * exp(x); } } template __global__ void gpu_efield_magnitude(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* M) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; if(Y == NULL) //if this is a scalar simulation M[i] = X[i].abs(); //compute the magnitude of the X component else { M[i] = rts::vec(X[i].abs(), Y[i].abs(), Z[i].abs()).len(); //M[i] = Z[i].abs(); } } template `````` ```66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86``` `````` __global__ void gpu_efield_real_magnitude(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* M) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; if(Y == NULL) //if this is a scalar simulation M[i] = X[i].abs(); //compute the magnitude of the X component else { M[i] = rts::vec(X[i].real(), Y[i].real(), Z[i].real()).len(); //M[i] = Z[i].abs(); } } template `````` ```87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102``` `````` __global__ void gpu_efield_polarization(complex* X, complex* Y, complex* Z, unsigned int r0, unsigned int r1, T* Px, T* Py, T* Pz) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; //compute the field polarization Px[i] = X[i].abs(); Py[i] = Y[i].abs(); `````` `103` `````` Pz[i] = Z[i].abs(); `````` `104` `````` `````` ```105 106 107 108 109 110 111 112 113``` `````` } /* This function computes the sum of two complex fields and stores the result in *dest */ template __global__ void gpu_efield_sum(complex* dest, complex* src, unsigned int r0, unsigned int r1) { int iu = blockIdx.x * blockDim.x + threadIdx.x; int iv = blockIdx.y * blockDim.y + threadIdx.y; `````` `114` `````` `````` ```115 116 117 118 119 120 121 122``` `````` //make sure that the thread indices are in-bounds if(iu >= r0 || iv >= r1) return; //compute the index into the field int i = iv*r0 + iu; //sum the fields dest[i] += src[i]; `````` ```123 124``` `````` } `````` ```125 126 127``` `````` /* This class implements a discrete representation of an electromagnetic field in 2D. The majority of this representation is done on the GPU. */ `````` `128` `````` template `````` ```129 130``` `````` class efield { `````` `131` `````` protected: `````` `132` `````` `````` `133` `````` bool vector; `````` ```134 135``` `````` //gpu pointer to the field data `````` ```136 137 138``` `````` rts::complex* X; rts::complex* Y; rts::complex* Z; `````` ```139 140 141 142``` `````` //resolution of the discrete field unsigned int R[2]; `````` `143` `````` //shape of the 2D field `````` `144` `````` rect pos; `````` `145` `````` `````` `146` `````` void from_planewave(planewave p) `````` ```147 148 149 150 151``` `````` { unsigned int SQRT_BLOCK = 16; //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); `````` `152` `````` `````` `153` `````` `````` `154` `````` gpu_planewave2efield <<>> (X, Y, Z, R[0], R[1], p, pos); `````` ```155 156``` `````` } `````` `157` `````` void init(unsigned int res0, unsigned int res1, bool _vector) `````` `158` `````` { `````` `159` `````` vector = _vector; //initialize field type `````` ```160 161``` `````` X = Y = Z = NULL; //initialize all pointers to NULL `````` ```162 163 164 165``` `````` R[0] = res0; R[1] = res1; //allocate memory on the gpu `````` ```166 167``` `````` cudaMalloc(&X, sizeof(rts::complex) * R[0] * R[1]); cudaMemset(X, 0, sizeof(rts::complex) * R[0] * R[1]); `````` `168` `````` `````` `169` `````` if(vector) `````` `170` `````` { `````` ```171 172``` `````` cudaMalloc(&Y, sizeof(rts::complex) * R[0] * R[1]); cudaMemset(Y, 0, sizeof(rts::complex) * R[0] * R[1]); `````` `173` `````` `````` ```174 175``` `````` cudaMalloc(&Z, sizeof(rts::complex) * R[0] * R[1]); cudaMemset(Z, 0, sizeof(rts::complex) * R[0] * R[1]); `````` ```176 177 178``` `````` } } `````` ```179 180 181 182 183 184 185``` `````` void destroy() { if(X != NULL) cudaFree(X); if(Y != NULL) cudaFree(Y); if(Z != NULL) cudaFree(Z); } `````` `186` `````` void shallowcpy(const rts::efield & src) `````` ```187 188 189 190 191 192``` `````` { vector = src.vector; R[0] = src.R[0]; R[1] = src.R[1]; } `````` `193` `````` void deepcpy(const rts::efield & src) `````` ```194 195 196 197 198 199 200``` `````` { //perform a shallow copy shallowcpy(src); //allocate memory on the gpu if(src.X != NULL) { `````` ```201 202``` `````` cudaMalloc(&X, sizeof(rts::complex) * R[0] * R[1]); cudaMemcpy(X, src.X, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); `````` ```203 204 205``` `````` } if(src.Y != NULL) { `````` ```206 207``` `````` cudaMalloc(&Y, sizeof(rts::complex) * R[0] * R[1]); cudaMemcpy(Y, src.Y, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); `````` ```208 209 210``` `````` } if(src.Z != NULL) { `````` ```211 212``` `````` cudaMalloc(&Z, sizeof(rts::complex) * R[0] * R[1]); cudaMemcpy(Z, src.Z, sizeof(rts::complex) * R[0] * R[1], cudaMemcpyDeviceToDevice); `````` ```213 214 215 216 217 218 219``` `````` } } public: efield(unsigned int res0, unsigned int res1, bool _vector = true) { init(res0, res1, _vector); `````` `220` `````` //pos = rts::rect(rts::vec(-10, 0, -10), rts::vec(-10, 0, 10), rts::vec(10, 0, 10)); `````` ```221 222``` `````` } `````` ```223 224 225``` `````` //destructor ~efield() { `````` ```226 227 228 229 230 231``` `````` destroy(); } ///Clear the field - set all points to zero void clear() { `````` `232` `````` cudaMemset(X, 0, sizeof(rts::complex) * R[0] * R[1]); `````` ```233 234 235``` `````` if(vector) { `````` ```236 237``` `````` cudaMemset(Y, 0, sizeof(rts::complex) * R[0] * R[1]); cudaMemset(Z, 0, sizeof(rts::complex) * R[0] * R[1]); `````` `238` `````` } `````` ```239 240``` `````` } `````` `241` `````` void position(rect _p) `````` ```242 243 244 245``` `````` { pos = _p; } `````` ```246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265``` `````` //access functions complex* x(){ return X; } complex* y(){ return Y; } complex* z(){ return Z; } unsigned int Ru(){ return R[0]; } unsigned int Rv(){ return R[1]; } rect p(){ return pos; } `````` ```266 267 268 269 270 271 272 273 274 275 276``` `````` std::string str() { stringstream ss; ss< & operator= (const efield & rhs) `````` ```279 280 281 282 283 284``` `````` { destroy(); //destroy any previous information about this field deepcpy(rhs); //create a deep copy return *this; //return the current object } `````` `285` `````` //assignment operator: build an electric field from a plane wave `````` `286` `````` efield & operator= (const planewave & rhs) `````` ```287 288 289 290``` `````` { clear(); //clear any previous field data from_planewave(rhs); //create a field from the planewave `````` `291` `````` return *this; //return the current object `````` ```292 293``` `````` } `````` `294` `````` //assignment operator: add an existing electric field `````` `295` `````` efield & operator+= (const efield & rhs) `````` `296` `````` { `````` ```297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319``` `````` //if this field and the source field represent the same regions in space if(R[0] == rhs.R[0] && R[1] == rhs.R[1] && pos == rhs.pos) { int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //sum the fields gpu_efield_sum <<>> (X, rhs.X, R[0], R[1]); if(Y != NULL) { gpu_efield_sum <<>> (Y, rhs.Y, R[0], R[1]); gpu_efield_sum <<>> (Z, rhs.Z, R[0], R[1]); } } else { std::cout<<"ERROR in efield: The two summed fields do not represent the same geometry."< & operator= (const rts::beam & rhs) `````` ```326 327``` `````` { //get a vector of monte-carlo samples `````` `328` `````` std::vector< rts::planewave > p_list = rhs.mc(); `````` ```329 330 331 332 333 334 335``` `````` clear(); //clear any previous field data for(unsigned int i = 0; i < p_list.size(); i++) from_planewave(p_list[i]); return *this; } `````` ```336 337``` `````` //return a scalar field representing field magnitude `````` `338` `````` realfield mag() `````` `339` `````` { `````` ```340 341``` `````` int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); `````` `342` `````` `````` `343` `````` //create a scalar field to store the result `````` `344` `````` realfield M(R[0], R[1]); `````` `345` `````` `````` ```346 347 348``` `````` //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); `````` `349` `````` `````` ```350 351``` `````` //compute the magnitude and store it in a scalar field gpu_efield_magnitude <<>> (X, Y, Z, R[0], R[1], M.ptr(0)); `````` ```352 353``` `````` return M; `````` ```354 355``` `````` } `````` `356` `````` //return a sacalar field representing the field magnitude at an infinitely small point in time `````` `357` `````` realfield real_mag() `````` ```358 359 360 361 362``` `````` { int maxThreads = rts::maxThreadsPerBlock(); //compute the optimal block size int SQRT_BLOCK = (int)std::sqrt((float)maxThreads); //create a scalar field to store the result `````` `363` `````` realfield M(R[0], R[1]); `````` ```364 365 366 367 368 369 370 371 372 373 374``` `````` //create one thread for each detector pixel dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); dim3 dimGrid((R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); //compute the magnitude and store it in a scalar field gpu_efield_real_magnitude <<>> (X, Y, Z, R[0], R[1], M.ptr(0)); return M; } `````` `375` `````` //return a vector field representing field polarization `````` `376` `````` realfield polarization() `````` `377` `````` { `````` `378` `````` if(!vector) `````` ```379 380 381 382 383 384 385 386 387 388 389 390``` `````` { std::cout<<"ERROR: Cannot compute polarization of a scalar field."< Pol(R[0], R[1]); //create a vector field to store the result `````` ```392 393``` `````` //compute the polarization and store it in the vector field `````` `394` `````` gpu_efield_polarization <<>> (X, Y, Z, R[0], R[1], Pol.ptr(0), Pol.ptr(1), Pol.ptr(2)); `````` ```395 396 397 398``` `````` return Pol; //return the vector field } `````` `399` `````` ////////FRIENDSHIP `````` `400` `````` //friend void operator<< (rts::efield &ef, rts::halfspace hs); `````` `401` `````` `````` ```402 403 404 405 406``` `````` }; `````` ```407 408``` `````` } //end namespace rts `````` `409` `````` #endif ``````