Commit ca41e27d4f34651d16b393be7b8ed883503c10c7
1 parent
2a10ecf4
Linux edits for BIMSim
Showing
2 changed files
with
11 additions
and
8 deletions
Show diff stats
stim/optics/scalarfield.h
... | ... | @@ -304,7 +304,7 @@ protected: |
304 | 304 | cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]); |
305 | 305 | } |
306 | 306 | |
307 | - void from_kspace(){ | |
307 | + void from_kspace(T& kx, T& ky){ | |
308 | 308 | kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space |
309 | 309 | ky = stim::TAU * R[1] / Y.len(); |
310 | 310 | T x, y; |
... | ... | @@ -473,7 +473,7 @@ public: |
473 | 473 | if(loc == CPUmem) ss<<"CPU"; |
474 | 474 | else ss<<"GPU"; |
475 | 475 | |
476 | - ss<<endl; | |
476 | + ss<<std::endl; | |
477 | 477 | return ss.str(); |
478 | 478 | } |
479 | 479 | ... | ... |
stim/optics/scalarmie.h
... | ... | @@ -117,7 +117,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* |
117 | 117 | stim::complex<T> hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed |
118 | 118 | int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops |
119 | 119 | |
120 | - #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers | |
120 | + //unroll LOCAL_NL + 1 | |
121 | + #pragma unroll 17 //copy the first LOCAL_NL+1 h_l * B_l components to registers | |
121 | 122 | for(l = 0; l <= LOCAL_NL; l++) |
122 | 123 | hlBl[l] = clerp<T>( hB[n0j + l], hB[n1j + l], alpha ); |
123 | 124 | |
... | ... | @@ -134,7 +135,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex<T>* E, size_t N, T* x, T* |
134 | 135 | Ei += Ew * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation |
135 | 136 | Ei += Ew * hlBl[1] * Pl_1; |
136 | 137 | |
137 | - #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) | |
138 | + //LOCAL_NL - 1 | |
139 | + #pragma unroll 15 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) | |
138 | 140 | for(l = 2; l <= LOCAL_NL; l++){ |
139 | 141 | Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs) |
140 | 142 | Ei += Ew * hlBl[l] * Pl; //calculate and sum the current field order |
... | ... | @@ -734,6 +736,7 @@ public: |
734 | 736 | |
735 | 737 | template<typename T> |
736 | 738 | class scalarcluster : public std::vector< scalarmie<T> > { |
739 | + | |
737 | 740 | public: |
738 | 741 | |
739 | 742 | void eval(stim::scalarfield<T>& E, stim::scalarbeam<T> b, int order = 500, int samples = 1000) { |
... | ... | @@ -745,10 +748,10 @@ public: |
745 | 748 | T radius; |
746 | 749 | stim::complex<T> n; |
747 | 750 | stim::vec3<T> c; |
748 | - for (size_t si = 0; si < size(); si++) { //for each sphere in the cluster | |
749 | - radius = at(si).radius; | |
750 | - n = at(si).n; | |
751 | - c = at(si).c; | |
751 | + for (size_t si = 0; si < std::vector< scalarmie<T> >::size(); si++) { //for each sphere in the cluster | |
752 | + radius = std::vector< scalarmie<T> >::at(si).radius; | |
753 | + n = std::vector< scalarmie<T> >::at(si).n; | |
754 | + c = std::vector< scalarmie<T> >::at(si).c; | |
752 | 755 | if (E.gpu()) { |
753 | 756 | stim::gpu_scalar_mie_scatter<float>(E.ptr(), E.size(), E.x(), E.y(), E.z(), wave_array, radius, n, c, E.spacing()); |
754 | 757 | } | ... | ... |