Commit ca41e27d4f34651d16b393be7b8ed883503c10c7

Authored by David Mayerich
1 parent 2a10ecf4

Linux edits for BIMSim

Showing 2 changed files with 11 additions and 8 deletions   Show diff stats
stim/optics/scalarfield.h
... ... @@ -304,7 +304,7 @@ protected:
304 304 cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]);
305 305 }
306 306  
307   - void from_kspace(){
  307 + void from_kspace(T& kx, T& ky){
308 308 kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space
309 309 ky = stim::TAU * R[1] / Y.len();
310 310 T x, y;
... ... @@ -473,7 +473,7 @@ public:
473 473 if(loc == CPUmem) ss<<"CPU";
474 474 else ss<<"GPU";
475 475  
476   - ss<<endl;
  476 + ss<<std::endl;
477 477 return ss.str();
478 478 }
479 479  
... ...
stim/optics/scalarmie.h
... ... @@ -117,7 +117,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T*
117 117 stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed
118 118 int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops
119 119  
120   - #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers
  120 + //unroll LOCAL_NL + 1
  121 + #pragma unroll 17 //copy the first LOCAL_NL+1 h_l * B_l components to registers
121 122 for(l = 0; l <= LOCAL_NL; l++)
122 123 hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha );
123 124  
... ... @@ -134,7 +135,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex&lt;T&gt;* E, size_t N, T* x, T*
134 135 Ei += Ew * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation
135 136 Ei += Ew * hlBl[1] * Pl_1;
136 137  
137   - #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file)
  138 + //LOCAL_NL - 1
  139 + #pragma unroll 15 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file)
138 140 for(l = 2; l <= LOCAL_NL; l++){
139 141 Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs)
140 142 Ei += Ew * hlBl[l] * Pl; //calculate and sum the current field order
... ... @@ -734,6 +736,7 @@ public:
734 736  
735 737 template<typename T>
736 738 class scalarcluster : public std::vector< scalarmie<T> > {
  739 +
737 740 public:
738 741  
739 742 void eval(stim::scalarfield<T>& E, stim::scalarbeam<T> b, int order = 500, int samples = 1000) {
... ... @@ -745,10 +748,10 @@ public:
745 748 T radius;
746 749 stim::complex<T> n;
747 750 stim::vec3<T> c;
748   - for (size_t si = 0; si < size(); si++) { //for each sphere in the cluster
749   - radius = at(si).radius;
750   - n = at(si).n;
751   - c = at(si).c;
  751 + for (size_t si = 0; si < std::vector< scalarmie<T> >::size(); si++) { //for each sphere in the cluster
  752 + radius = std::vector< scalarmie<T> >::at(si).radius;
  753 + n = std::vector< scalarmie<T> >::at(si).n;
  754 + c = std::vector< scalarmie<T> >::at(si).c;
752 755 if (E.gpu()) {
753 756 stim::gpu_scalar_mie_scatter<float>(E.ptr(), E.size(), E.x(), E.y(), E.z(), wave_array, radius, n, c, E.spacing());
754 757 }
... ...