Commit ca41e27d4f34651d16b393be7b8ed883503c10c7
1 parent
2a10ecf4
Linux edits for BIMSim
Showing
2 changed files
with
11 additions
and
8 deletions
Show diff stats
stim/optics/scalarfield.h
@@ -304,7 +304,7 @@ protected: | @@ -304,7 +304,7 @@ protected: | ||
304 | cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]); | 304 | cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]); |
305 | } | 305 | } |
306 | 306 | ||
307 | - void from_kspace(){ | 307 | + void from_kspace(T& kx, T& ky){ |
308 | kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space | 308 | kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space |
309 | ky = stim::TAU * R[1] / Y.len(); | 309 | ky = stim::TAU * R[1] / Y.len(); |
310 | T x, y; | 310 | T x, y; |
@@ -473,7 +473,7 @@ public: | @@ -473,7 +473,7 @@ public: | ||
473 | if(loc == CPUmem) ss<<"CPU"; | 473 | if(loc == CPUmem) ss<<"CPU"; |
474 | else ss<<"GPU"; | 474 | else ss<<"GPU"; |
475 | 475 | ||
476 | - ss<<endl; | 476 | + ss<<std::endl; |
477 | return ss.str(); | 477 | return ss.str(); |
478 | } | 478 | } |
479 | 479 |
stim/optics/scalarmie.h
@@ -117,7 +117,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* | @@ -117,7 +117,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* | ||
117 | stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed | 117 | stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed |
118 | int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops | 118 | int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops |
119 | 119 | ||
120 | - #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers | 120 | + //unroll LOCAL_NL + 1 |
121 | + #pragma unroll 17 //copy the first LOCAL_NL+1 h_l * B_l components to registers | ||
121 | for(l = 0; l <= LOCAL_NL; l++) | 122 | for(l = 0; l <= LOCAL_NL; l++) |
122 | hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha ); | 123 | hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha ); |
123 | 124 | ||
@@ -134,7 +135,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* | @@ -134,7 +135,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* | ||
134 | Ei += Ew * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation | 135 | Ei += Ew * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation |
135 | Ei += Ew * hlBl[1] * Pl_1; | 136 | Ei += Ew * hlBl[1] * Pl_1; |
136 | 137 | ||
137 | - #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) | 138 | + //LOCAL_NL - 1 |
139 | + #pragma unroll 15 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) | ||
138 | for(l = 2; l <= LOCAL_NL; l++){ | 140 | for(l = 2; l <= LOCAL_NL; l++){ |
139 | Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs) | 141 | Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs) |
140 | Ei += Ew * hlBl[l] * Pl; //calculate and sum the current field order | 142 | Ei += Ew * hlBl[l] * Pl; //calculate and sum the current field order |
@@ -734,6 +736,7 @@ public: | @@ -734,6 +736,7 @@ public: | ||
734 | 736 | ||
735 | template<typename T> | 737 | template<typename T> |
736 | class scalarcluster : public std::vector< scalarmie<T> > { | 738 | class scalarcluster : public std::vector< scalarmie<T> > { |
739 | + | ||
737 | public: | 740 | public: |
738 | 741 | ||
739 | void eval(stim::scalarfield<T>& E, stim::scalarbeam<T> b, int order = 500, int samples = 1000) { | 742 | void eval(stim::scalarfield<T>& E, stim::scalarbeam<T> b, int order = 500, int samples = 1000) { |
@@ -745,10 +748,10 @@ public: | @@ -745,10 +748,10 @@ public: | ||
745 | T radius; | 748 | T radius; |
746 | stim::complex<T> n; | 749 | stim::complex<T> n; |
747 | stim::vec3<T> c; | 750 | stim::vec3<T> c; |
748 | - for (size_t si = 0; si < size(); si++) { //for each sphere in the cluster | ||
749 | - radius = at(si).radius; | ||
750 | - n = at(si).n; | ||
751 | - c = at(si).c; | 751 | + for (size_t si = 0; si < std::vector< scalarmie<T> >::size(); si++) { //for each sphere in the cluster |
752 | + radius = std::vector< scalarmie<T> >::at(si).radius; | ||
753 | + n = std::vector< scalarmie<T> >::at(si).n; | ||
754 | + c = std::vector< scalarmie<T> >::at(si).c; | ||
752 | if (E.gpu()) { | 755 | if (E.gpu()) { |
753 | stim::gpu_scalar_mie_scatter<float>(E.ptr(), E.size(), E.x(), E.y(), E.z(), wave_array, radius, n, c, E.spacing()); | 756 | stim::gpu_scalar_mie_scatter<float>(E.ptr(), E.size(), E.x(), E.y(), E.z(), wave_array, radius, n, c, E.spacing()); |
754 | } | 757 | } |