Commit ca41e27d4f34651d16b393be7b8ed883503c10c7

Authored by David Mayerich
1 parent 2a10ecf4

Linux edits for BIMSim

Showing 2 changed files with 11 additions and 8 deletions   Show diff stats
stim/optics/scalarfield.h
@@ -304,7 +304,7 @@ protected: @@ -304,7 +304,7 @@ protected:
304 cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]); 304 cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]);
305 } 305 }
306 306
307 - void from_kspace(){ 307 + void from_kspace(T& kx, T& ky){
308 kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space 308 kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space
309 ky = stim::TAU * R[1] / Y.len(); 309 ky = stim::TAU * R[1] / Y.len();
310 T x, y; 310 T x, y;
@@ -473,7 +473,7 @@ public: @@ -473,7 +473,7 @@ public:
473 if(loc == CPUmem) ss<<"CPU"; 473 if(loc == CPUmem) ss<<"CPU";
474 else ss<<"GPU"; 474 else ss<<"GPU";
475 475
476 - ss<<endl; 476 + ss<<std::endl;
477 return ss.str(); 477 return ss.str();
478 } 478 }
479 479
stim/optics/scalarmie.h
@@ -117,7 +117,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T* @@ -117,7 +117,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T*
117 stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed 117 stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed
118 int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops 118 int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops
119 119
120 - #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers 120 + //unroll LOCAL_NL + 1
  121 + #pragma unroll 17 //copy the first LOCAL_NL+1 h_l * B_l components to registers
121 for(l = 0; l <= LOCAL_NL; l++) 122 for(l = 0; l <= LOCAL_NL; l++)
122 hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha ); 123 hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha );
123 124
@@ -134,7 +135,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T* @@ -134,7 +135,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T*
134 Ei += Ew * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation 135 Ei += Ew * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation
135 Ei += Ew * hlBl[1] * Pl_1; 136 Ei += Ew * hlBl[1] * Pl_1;
136 137
137 - #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) 138 + //LOCAL_NL - 1
  139 + #pragma unroll 15 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file)
138 for(l = 2; l <= LOCAL_NL; l++){ 140 for(l = 2; l <= LOCAL_NL; l++){
139 Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs) 141 Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs)
140 Ei += Ew * hlBl[l] * Pl; //calculate and sum the current field order 142 Ei += Ew * hlBl[l] * Pl; //calculate and sum the current field order
@@ -734,6 +736,7 @@ public: @@ -734,6 +736,7 @@ public:
734 736
735 template<typename T> 737 template<typename T>
736 class scalarcluster : public std::vector< scalarmie<T> > { 738 class scalarcluster : public std::vector< scalarmie<T> > {
  739 +
737 public: 740 public:
738 741
739 void eval(stim::scalarfield<T>& E, stim::scalarbeam<T> b, int order = 500, int samples = 1000) { 742 void eval(stim::scalarfield<T>& E, stim::scalarbeam<T> b, int order = 500, int samples = 1000) {
@@ -745,10 +748,10 @@ public: @@ -745,10 +748,10 @@ public:
745 T radius; 748 T radius;
746 stim::complex<T> n; 749 stim::complex<T> n;
747 stim::vec3<T> c; 750 stim::vec3<T> c;
748 - for (size_t si = 0; si < size(); si++) { //for each sphere in the cluster  
749 - radius = at(si).radius;  
750 - n = at(si).n;  
751 - c = at(si).c; 751 + for (size_t si = 0; si < std::vector< scalarmie<T> >::size(); si++) { //for each sphere in the cluster
  752 + radius = std::vector< scalarmie<T> >::at(si).radius;
  753 + n = std::vector< scalarmie<T> >::at(si).n;
  754 + c = std::vector< scalarmie<T> >::at(si).c;
752 if (E.gpu()) { 755 if (E.gpu()) {
753 stim::gpu_scalar_mie_scatter<float>(E.ptr(), E.size(), E.x(), E.y(), E.z(), wave_array, radius, n, c, E.spacing()); 756 stim::gpu_scalar_mie_scatter<float>(E.ptr(), E.size(), E.x(), E.y(), E.z(), wave_array, radius, n, c, E.spacing());
754 } 757 }