diff --git a/stim/optics/scalarfield.h b/stim/optics/scalarfield.h index a7a5073..231342e 100644 --- a/stim/optics/scalarfield.h +++ b/stim/optics/scalarfield.h @@ -304,7 +304,7 @@ protected: cpu_scalar_to_kspace(E, kx, ky, E, X.len(), Y.len(), R[0], R[1]); } - void from_kspace(){ + void from_kspace(T& kx, T& ky){ kx = stim::TAU * R[0] / X.len(); //calculate the width of the momentum space ky = stim::TAU * R[1] / Y.len(); T x, y; @@ -473,7 +473,7 @@ public: if(loc == CPUmem) ss<<"CPU"; else ss<<"GPU"; - ss<* E, size_t N, T* x, T* stim::complex hlBl[LOCAL_NL+1]; //the first LOCAL_NL components are stored in registers for speed int shared_start = threadIdx.x * (Nl - LOCAL_NL); //wrap up some operations so that they aren't done in the main loops - #pragma unroll LOCAL_NL+1 //copy the first LOCAL_NL+1 h_l * B_l components to registers + //unroll LOCAL_NL + 1 + #pragma unroll 17 //copy the first LOCAL_NL+1 h_l * B_l components to registers for(l = 0; l <= LOCAL_NL; l++) hlBl[l] = clerp( hB[n0j + l], hB[n1j + l], alpha ); @@ -134,7 +135,8 @@ __global__ void cuda_scalar_mie_scatter(stim::complex* E, size_t N, T* x, T* Ei += Ew * hlBl[0] * Pl_2; //unroll the first two orders using the initial steps of the Legendre recursive relation Ei += Ew * hlBl[1] * Pl_1; - #pragma unroll LOCAL_NL-1 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) + //LOCAL_NL - 1 + #pragma unroll 15 //unroll the next LOCAL_NL-1 loops for speed (iterating through the components in the register file) for(l = 2; l <= LOCAL_NL; l++){ Pl = ( (2 * (l-1) + 1) * cos_phi * Pl_1 - (l-1) * Pl_2 ) / (l); //calculate the next step in the Legendre polynomial recursive relation (this is where most of the computation occurs) Ei += Ew * hlBl[l] * Pl; //calculate and sum the current field order @@ -734,6 +736,7 @@ public: template class scalarcluster : public std::vector< scalarmie > { + public: void eval(stim::scalarfield& E, stim::scalarbeam b, int order = 500, int samples = 1000) { @@ -745,10 +748,10 @@ public: T radius; stim::complex n; stim::vec3 c; - for (size_t si = 0; si < size(); si++) { //for each sphere in the cluster - radius = at(si).radius; - n = at(si).n; - c = at(si).c; + for (size_t si = 0; si < std::vector< scalarmie >::size(); si++) { //for each sphere in the cluster + radius = std::vector< scalarmie >::at(si).radius; + n = std::vector< scalarmie >::at(si).n; + c = std::vector< scalarmie >::at(si).c; if (E.gpu()) { stim::gpu_scalar_mie_scatter(E.ptr(), E.size(), E.x(), E.y(), E.z(), wave_array, radius, n, c, E.spacing()); } -- libgit2 0.21.4