diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..7df99da --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,95 @@ +#Specify the version being used aswell as the language +cmake_minimum_required(VERSION 2.8) +#Name your project here +project(bimsim) + +#set the module directory +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") + +#find BOOST +set(Boost_USE_STATIC_LIBS ON) +set(Boost_USE_MULTITHREADED ON) +set(Boost_USE_STATIC_RUNTIME OFF) +find_package( Boost 1.49.0 COMPONENTS program_options ) + +#find the Qt4 +find_package(Qt4 REQUIRED) +include_directories(${QT_INCLUDE_DIRECTORY}) +include(${QT_USE_FILE}) + +#set up CUDA +find_package(CUDA) + +#find OpenGL +#find_package(OpenGL REQUIRED) + +#find GLUT +#set(GLUT_ROOT_PATH $ENV{GLUT_ROOT_PATH}) +#find_package(GLUT REQUIRED) + +#find GLEW +#find_package(GLEW REQUIRED) + +#add Qt OpenGL stuff +#set(QT_USE_QTOPENGL TRUE) + +#ask the user for the RTS location +set(RTS_ROOT_PATH $ENV{RTS_ROOT_PATH}) +find_package(RTS REQUIRED) + +#set the include directories +include_directories( + ${CMAKE_CURRENT_BINARY_DIR} + ${QT_INCLUDES} + ${QT_QTOPENGL_INCLUDE_DIR} +# ${OPENGL_INCLUDE_DIR} +# ${GLEW_INCLUDE_PATH} +# ${GLUT_INCLUDE_DIR} + ${RTS_INCLUDE_DIR} + ${Boost_INCLUDE_DIR} +) + +#enable warnings +if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) + add_definitions(-Wall) +endif() + +#Assign source files to the appropriate variables +file(GLOB SRC_CPP "*.cpp") +file(GLOB SRC_H "*.h") +#file(GLOB SRC_UI "*.ui") +#file(GLOB SRC_QRC "*.qrc") +file(GLOB SRC_CU "*.cu") + +#set up copying data files +#configure_file(rtsSU8_k.txt ${CMAKE_CURRENT_BINARY_DIR}/rtsSU8_k.txt @ONLY) +#configure_file(rtsSU8_n.txt ${CMAKE_CURRENT_BINARY_DIR}/rtsSU8_n.txt @ONLY) +#configure_file(SurfaceMagnitude.glsl ${CMAKE_CURRENT_BINARY_DIR}/SurfaceMagnitude.glsl @ONLY) +#configure_file(SurfaceDeform.glsl ${CMAKE_CURRENT_BINARY_DIR}/SurfaceDeform.glsl @ONLY) + +#determine which source files have to be moc'd +#Qt4_wrap_cpp(UI_MOC ${SRC_H}) +#Qt4_wrap_ui(UI_H ${SRC_UI}) +#Qt4_add_resources(ALL_RCC ${ALL_QRC}) + +#moc the necessary files +#Qt4_automoc(${ALL_CPP}) + +#source_group(QtMoc FILES ${UI_MOC}) +#source_group(QtUI FILES ${SRC_UI}) + +#create an executable +cuda_add_executable(bimsim + ${SRC_CPP} + ${SRC_H} +# ${UI_H} +# ${UI_MOC} +# ${ALL_RCC} + ${SRC_CU} +) + +#set the link libraries +target_link_libraries(bimsim ${QT_LIBRARIES} ${QT_QTOPENGL_LIBRARY} ${OPENGL_gl_LIBRARY} ${OPENGL_glu_LIBRARY} ${GLEW_LIBRARY} ${CUDA_cufft_LIBRARY} ${CUDA_cublas_LIBRARY} ${GLUT_glut_LIBRARY} ${Boost_LIBRARIES}) + + + diff --git a/FindCUDASDK.cmake b/FindCUDASDK.cmake new file mode 100644 index 0000000..f3bee90 --- /dev/null +++ b/FindCUDASDK.cmake @@ -0,0 +1,142 @@ +# +# The script defines the following variables: +# +############################################################################## +# Note: Removed everything related to CUDA_SDK_ROOT_DIR and only left this as +# a possible environment variable to set the SDK directory. +# Include file will be: CUDA_CUT_INCLUDE_DIR +# Cutil library: CUDA_CUT_LIBRARY +############################################################################## +# +# +# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the +# SDK. This script will not directly support finding +# specific libraries or headers, as that isn't +# supported by NVIDIA. If you want to change +# libraries when the path changes see the +# FindCUDA.cmake script for an example of how to clear +# these variables. There are also examples of how to +# use the CUDA_SDK_ROOT_DIR to locate headers or +# libraries, if you so choose (at your own risk). +# +# This code is licensed under the MIT License. See the FindCUDASDK.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### + +# FindCUDASDK.cmake + +# # Check to see if the CUDA_SDK_ROOT_DIR has changed, +# # if it has then clear the cache variable, so that it will be detected again. +# if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}") +# # No specific variables to catch. Use this kind of code before calling +# # find_package(CUDA) to clean up any variables that may depend on this path. +# +# # unset(MY_SPECIAL_CUDA_SDK_INCLUDE_DIR CACHE) +# # unset(MY_SPECIAL_CUDA_SDK_LIBRARY CACHE) +# endif() +# +# ######################## +# # Look for the SDK stuff +# find_path(CUDA_SDK_ROOT_DIR cutil.h +# PATH_SUFFIXES "common/inc" "C/common/inc" +# "$ENV{NVSDKCUDA_ROOT}" +# "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]" +# "/Developer/GPU\ Computing/C" +# ) +# +# # fallback method for determining CUDA_SDK_ROOT_DIR in case the previous one failed! +# if (NOT CUDA_SDK_ROOT_DIR) +# find_path(CUDA_SDK_ROOT_DIR C/common/inc/cutil.h +# "$ENV{NVSDKCUDA_ROOT}" +# "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]" +# "/Developer/GPU\ Computing/C" +# ) +# endif() + +# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the +# environment variables. +set(CUDA_SDK_SEARCH_PATH + "${CUDA_SDK_ROOT_DIR}" + "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2" + "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2" + "${CUDA_TOOLKIT_ROOT_DIR}/NV_SDK" + "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK" + "$ENV{HOME}/NVIDIA_CUDA_SDK" + "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX" + "$ENV{HOME}/NVIDIA_GPU_Computing_SDK" + "/Developer/CUDA" + ) + +# Find include file from the CUDA_SDK_SEARCH_PATH + +find_path(CUDA_CUT_INCLUDE_DIR + cutil.h + PATHS ${CUDA_SDK_SEARCH_PATH} + PATH_SUFFIXES "common/inc" "C/common/inc" + DOC "Location of cutil.h" + NO_DEFAULT_PATH + ) +# Now search system paths +find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h") + +# mark_as_advanced(CUDA_CUT_INCLUDE_DIR) + + +# Example of how to find a library in the CUDA_SDK_ROOT_DIR + +# cutil library is called cutil64 for 64 bit builds on windows. We don't want +# to get these confused, so we are setting the name based on the word size of +# the build. + +# New library might be called cutil_x86_64 ! + +if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(cuda_cutil_name cutil64) +else(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(cuda_cutil_name cutil32) +endif(CMAKE_SIZEOF_VOID_P EQUAL 8) + +find_library(CUDA_CUT_LIBRARY + NAMES ${cuda_cutil_name} cutil cutil_x86_64 cutil_i386 + PATHS ${CUDA_SDK_SEARCH_PATH} + # The new version of the sdk shows up in common/lib, but the old one is in lib + # The very newest installation Path of the SDK is in subdirectory 'C'. Please add this Path to the possible suffixes. + PATH_SUFFIXES "C/lib" "common/lib" "lib" "C/common/lib" "common/lib" + DOC "Location of cutil library" + NO_DEFAULT_PATH + ) +# # Now search system paths +# find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library") +mark_as_advanced(CUDA_CUT_LIBRARY) +set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY}) + +############################# +# Check for required components +if(CUDA_CUT_INCLUDE_DIR) + set(CUDASDK_FOUND TRUE) +endif(CUDA_CUT_INCLUDE_DIR) + +# set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL +# "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE) diff --git a/FindGLEW.cmake b/FindGLEW.cmake new file mode 100644 index 0000000..9fe32e8 --- /dev/null +++ b/FindGLEW.cmake @@ -0,0 +1,51 @@ +# +# Try to find GLEW library and include path. +# Once done this will define +# +# GLEW_FOUND +# GLEW_INCLUDE_PATH +# GLEW_LIBRARY +# + +IF (WIN32) + FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h + $ENV{PROGRAMFILES}/GLEW/include + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/include + DOC "The directory where GL/glew.h resides") + FIND_LIBRARY( GLEW_LIBRARY + NAMES glew GLEW glew32 glew32s + PATHS + $ENV{PROGRAMFILES}/GLEW/lib + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/bin + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/lib + DOC "The GLEW library") +ELSE (WIN32) + FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h + /usr/include + /usr/local/include + /sw/include + /opt/local/include + DOC "The directory where GL/glew.h resides") + FIND_LIBRARY( GLEW_LIBRARY + NAMES GLEW glew + PATHS + /usr/lib64 + /usr/lib + /usr/local/lib64 + /usr/local/lib + /sw/lib + /opt/local/lib + DOC "The GLEW library") +ENDIF (WIN32) + +IF (GLEW_INCLUDE_PATH) + SET( GLEW_FOUND 1 CACHE STRING "Set to 1 if GLEW is found, 0 otherwise") +ELSE (GLEW_INCLUDE_PATH) + SET( GLEW_FOUND 0 CACHE STRING "Set to 1 if GLEW is found, 0 otherwise") +ENDIF (GLEW_INCLUDE_PATH) + +MARK_AS_ADVANCED( + GLEW_FOUND + GLEW_INCLUDE_PATH + GLEW_LIBRARY +) \ No newline at end of file diff --git a/FindRTS.cmake b/FindRTS.cmake new file mode 100644 index 0000000..fbfcc44 --- /dev/null +++ b/FindRTS.cmake @@ -0,0 +1,14 @@ +# Tries to find the RTS include directory + + FIND_PATH( RTS_INCLUDE_DIR NAMES rts_glShaderProgram.h + PATHS + ${CMAKE_CURRENT_SOURCE_DIR}/rts + ${RTS_ROOT_PATH} +) + +IF (RTS_FOUND) + #The following deprecated settings are for backwards compatibility with CMake1.4 + SET (RTS_INCLUDE_PATH ${RTS_INCLUDE_DIR}) +ENDIF(RTS_FOUND) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS(RTS REQUIRED_VARS TRUE RTS_INCLUDE_DIR) diff --git a/README.md b/README.md new file mode 100644 index 0000000..9aefa3e --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +BIM-Sim computes the interactions between an incident electromagnetic field and spherical scatterers using Mie theory. + +A Windows 32 executable is provided in the Win32 directory. All of the necessary runtime libraries are included. Start by running 'ricalc -h' for command-line options. + +This software is designed to be used with CMake, so we recommend using it to build the application. + +This software requires the following libraries for compiling: + +CUDA (including CUFFT and CUBLAS) - NVidia's GPU programming toolkit -- https://developer.nvidia.com/ + +RTS - Real-Time Scraps C++ library (my codebase) -- https://github.com/dmayerich/RTS + +Boost C++ Library (specifically the Program Options library) -- http://www.boost.org/ + +Qt (Nokia's GUI Library) -- https://qt-project.org/ + + + + + + diff --git a/bessel.h b/bessel.h new file mode 100644 index 0000000..a9055df --- /dev/null +++ b/bessel.h @@ -0,0 +1,51 @@ +#ifndef bessH +#define bessH +#define _USE_MATH_DEFINES +#include +#include +using namespace std; +#define eps 1e-15 +#define el 0.5772156649015329 + +int msta1(double x,int mp); +int msta2(double x,int n,int mp); +int bessjy01a(double x,double &j0,double &j1,double &y0,double &y1, + double &j0p,double &j1p,double &y0p,double &y1p); +int bessjy01b(double x,double &j0,double &j1,double &y0,double &y1, + double &j0p,double &j1p,double &y0p,double &y1p); +int bessjyna(int n,double x,int &nm,double *jn,double *yn, + double *jnp,double *ynp); +int bessjynb(int n,double x,int &nm,double *jn,double *yn, + double *jnp,double *ynp); +int bessjyv(double v,double x,double &vm,double *jv,double *yv, + double *jvp,double *yvp); +int bessik01a(double x,double &i0,double &i1,double &k0,double &k1, + double &i0p,double &i1p,double &k0p,double &k1p); +int bessik01b(double x,double &i0,double &i1,double &k0,double &k1, + double &i0p,double &i1p,double &k0p,double &k1p); +int bessikna(int n,double x,int &nm,double *in,double *kn, + double *inp,double *knp); +int bessiknb(int n,double x,int &nm,double *in,double *kn, + double *inp,double *knp); +int bessikv(double v,double x,double &vm,double *iv,double *kv, + double *ivp,double *kvp); +int cbessjy01(complex z,complex &cj0,complex &cj1, + complex &cy0,complex &cy1,complex &cj0p, + complex &cj1p,complex &cy0p,complex &cy1p); +int cbessjyna(int n,complex z,int &nm,complex *cj, + complex *cy,complex *cjp,complex *cyp); +int cbessjynb(int n,complex z,int &nm,complex *cj, + complex *cy,complex *cjp,complex *cyp); +int cbessik01(complexz,complex&ci0,complex&ci1, + complex&ck0,complex&ck1,complex&ci0p, + complex&ci1p,complex&ck0p,complex&ck1p); +int cbessikna(int n,complex z,int &nm,complex *ci, + complex *ck,complex *cip,complex *ckp); +int cbessiknb(int n,complex z,int &nm,complex *ci, + complex *ck,complex *cip,complex *ckp); +int cbessjyva(double v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp); +int cbessikv(double v,complexz,double &vm,complex *civ, + complex *ckv,complex *civp,complex *ckvp); + +#endif diff --git a/bessik.cpp b/bessik.cpp new file mode 100644 index 0000000..322000b --- /dev/null +++ b/bessik.cpp @@ -0,0 +1,487 @@ +// bessik.cpp -- computation of modified Bessel functions In, Kn +// and their derivatives. Algorithms and coefficient values from +// "Computation of Special Functions", Zhang and Jin, John +// Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +#define _USE_MATH_DEFINES +#include +#include "bessel.h" + +double gamma(double x); + +int bessik01a(double x,double &i0,double &i1,double &k0,double &k1, + double &i0p,double &i1p,double &k0p,double &k1p) +{ + double r,x2,ca,cb,ct,ww,w0,xr,xr2; + int k,kz; + static double a[] = { + 0.125, + 7.03125e-2, + 7.32421875e-2, + 1.1215209960938e-1, + 2.2710800170898e-1, + 5.7250142097473e-1, + 1.7277275025845, + 6.0740420012735, + 2.4380529699556e1, + 1.1001714026925e2, + 5.5133589612202e2, + 3.0380905109224e3}; + static double b[] = { + -0.375, + -1.171875e-1, + -1.025390625e-1, + -1.4419555664063e-1, + -2.7757644653320e-1, + -6.7659258842468e-1, + -1.9935317337513, + -6.8839142681099, + -2.7248827311269e1, + -1.2159789187654e2, + -6.0384407670507e2, + -3.3022722944809e3}; + static double a1[] = { + 0.125, + 0.2109375, + 1.0986328125, + 1.1775970458984e1, + 2.1461706161499e2, + 5.9511522710323e3, + 2.3347645606175e5, + 1.2312234987631e7}; + + if (x < 0.0) return 1; + if (x == 0.0) { + i0 = 1.0; + i1 = 0.0; + k0 = 1e308; + k1 = 1e308; + i0p = 0.0; + i1p = 0.5; + k0p = -1e308; + k1p = -1e308; + return 0; + } + x2 = x*x; + if (x <= 18.0) { + i0 = 1.0; + r = 1.0; + for (k=1;k<=50;k++) { + r *= 0.25*x2/(k*k); + i0 += r; + if (fabs(r/i0) < eps) break; + } + i1 = 1.0; + r = 1.0; + for (k=1;k<=50;k++) { + r *= 0.25*x2/(k*(k+1)); + i1 += r; + if (fabs(r/i1) < eps) break; + } + i1 *= 0.5*x; + } + else { + if (x >= 50.0) kz = 7; + else if (x >= 35.0) kz = 9; + else kz = 12; + ca = exp(x)/sqrt(2.0*M_PI*x); + i0 = 1.0; + xr = 1.0/x; + for (k=0;k 40.0) && (n < (int)(0.25*x))) { + h0 = bi0; + h1 = bi1; + for (k=2;k<=n;k++) { + h = -2.0*(k-1.0)*h1/x+h0; + in[k] = h; + h0 = h1; + h1 = h; + } + } + else { + m = msta1(x,200); + if (m < n) nm = m; + else m = msta2(x,n,15); + f0 = 0.0; + f1 = 1.0e-100; + for (k=m;k>=0;k--) { + f = 2.0*(k+1.0)*f1/x+f0; + if (x <= nm) in[k] = f; + f0 = f1; + f1 = f; + } + s0 = bi0/f; + for (k=0;k<=m;k++) { + in[k] *= s0; + } + } + g0 = bk0; + g1 = bk1; + for (k=2;k<=nm;k++) { + g = 2.0*(k-1.0)*g1/x+g0; + kn[k] = g; + g0 = g1; + g1 = g; + } + for (k=2;k<=nm;k++) { + inp[k] = in[k-1]-k*in[k]/x; + knp[k] = -kn[k-1]-k*kn[k]/x; + } + return 0; +} +int bessiknb(int n,double x,int &nm,double *in,double *kn, + double *inp,double *knp) +{ + double s0,bs,f,f0,f1,sk0,a0,bkl,vt,r,g,g0,g1; + int k,kz,m,l; + + if ((x < 0.0) || (n < 0)) return 1; + if (x < eps) { + for (k=0;k<=n;k++) { + in[k] = 0.0; + kn[k] = 1e308; + inp[k] = 0.0; + knp[k] = -1e308; + } + in[0] = 1.0; + inp[1] = 0.5; + return 0; + } + nm = n; + if (n == 0) nm = 1; + m = msta1(x,200); + if (m < nm) nm = m; + else m = msta2(x,nm,15); + bs = 0.0; + sk0 = 0.0; + f0 = 0.0; + f1 = 1.0e-100; + for (k=m;k>=0;k--) { + f = 2.0*(k+1.0)*f1/x+f0; + if (k <= nm) in[k] = f; + if ((k != 0) && (k == 2*(int)(k/2))) { + sk0 += 4.0*f/k; + } + bs += 2.0*f; + f0 = f1; + f1 = f; + } + s0 = exp(x)/(bs-f); + for (k=0;k<=nm;k++) { + in[k] *= s0; + } + if (x <= 8.0) { + kn[0] = -(log(0.5*x)+el)*in[0]+s0*sk0; + kn[1] = (1.0/x-in[1]*kn[0])/in[0]; + } + else { + a0 = sqrt(M_PI_2/x)*exp(-x); + if (x >= 200.0) kz = 6; + else if (x >= 80.0) kz = 8; + else if (x >= 25.0) kz = 10; + else kz = 16; + for (l=0;l<2;l++) { + bkl = 1.0; + vt = 4.0*l; + r = 1.0; + for (k=1;k<=kz;k++) { + r *= 0.125*(vt-pow(2.0*k-1.0,2))/(k*x); + bkl += r; + } + kn[l] = a0*bkl; + } + } + g0 = kn[0]; + g1 = kn[1]; + for (k=2;k<=nm;k++) { + g = 2.0*(k-1.0)*g1/x+g0; + kn[k] = g; + g0 = g1; + g1 = g; + } + inp[0] = in[1]; + knp[0] = -kn[1]; + for (k=1;k<=nm;k++) { + inp[k] = in[k-1]-k*in[k]/x; + knp[k] = -kn[k-1]-k*kn[k]/x; + } + return 0; +} + +// The following program computes the modified Bessel functions +// Iv(x) and Kv(x) for arbitrary positive order. For negative +// order use: +// +// I-v(x) = Iv(x) + 2/pi sin(v pi) Kv(x) +// K-v(x) = Kv(x) +// +int bessikv(double v,double x,double &vm,double *iv,double *kv, + double *ivp,double *kvp) +{ + double x2,v0,piv,vt,a1,v0p,gap,r,bi0,ca,sum; + double f,f1,f2,ct,cs,wa,gan,ww,w0,v0n; + double r1,r2,bk0,bk1,bk2,a2,cb; + int n,k,kz,m; + + if ((v < 0.0) || (x < 0.0)) return 1; + x2 = x*x; + n = (int)v; + v0 = v-n; + if (n == 0) n = 1; + if (x == 0.0) { + for (k=0;k<=n;k++) { + iv[k] = 0.0; + kv[k] = -1e308; + ivp[k] = 0.0; + kvp[k] = 1e308; + } + if (v0 == 0.0) { + iv[0] = 1.0; + ivp[1] = 0.5; + } + vm = v; + return 0; + } + piv = M_PI*v0; + vt = 4.0*v0*v0; + if (v0 == 0.0) { + a1 = 1.0; + } + else { + v0p = 1.0+v0; + gap = gamma(v0p); + a1 = pow(0.5*x,v0)/gap; + } + if (x >= 50.0) kz = 8; + else if (x >= 35.0) kz = 10; + else kz = 14; + if (x <= 18.0) { + bi0 = 1.0; + r = 1.0; + for (k=1;k<=30;k++) { + r *= 0.25*x2/(k*(k+v0)); + bi0 += r; + if (fabs(r/bi0) < eps) break; + } + bi0 *= a1; + } + else { + ca = exp(x)/sqrt(2.0*M_PI*x); + sum = 1.0; + r = 1.0; + for (k=1;k<=kz;k++) { + r *= -0.125*(vt-pow(2.0*k-1.0,2.0))/(k*x); + sum += r; + } + bi0 = ca*sum; + } + m = msta1(x,200); + if (m < n) n = m; + else m = msta2(x,n,15); + f2 = 0.0; + f1 = 1.0e-100; + for (k=m;k>=0;k--) { + f = 2.0*(v0+k+1.0)*f1/x+f2; + if (k <= n) iv[k] = f; + f2 = f1; + f1 = f; + } + cs = bi0/f; + for (k=0;k<=n;k++) { + iv[k] *= cs; + } + ivp[0] = v0*iv[0]/x+iv[1]; + for (k=1;k<=n;k++) { + ivp[k] = -(k+v0)*iv[k]/x+iv[k-1]; + } + ww = 0.0; + if (x <= 9.0) { + if (v0 == 0.0) { + ct = -log(0.5*x)-el; + cs = 0.0; + w0 = 0.0; + r = 1.0; + for (k=1;k<=50;k++) { + w0 += 1.0/k; + r *= 0.25*x2/(k*k); + cs += r*(w0+ct); + wa = fabs(cs); + if (fabs((wa-ww)/wa) < eps) break; + ww = wa; + } + bk0 = ct+cs; + } + else { + v0n = 1.0-v0; + gan = gamma(v0n); + a2 = 1.0/(gan*pow(0.5*x,v0)); + a1 = pow(0.5*x,v0)/gap; + sum = a2-a1; + r1 = 1.0; + r2 = 1.0; + for (k=1;k<=120;k++) { + r1 *= 0.25*x2/(k*(k-v0)); + r2 *= 0.25*x2/(k*(k+v0)); + sum += a2*r1-a1*r2; + wa = fabs(sum); + if (fabs((wa-ww)/wa) < eps) break; + ww = wa; + } + bk0 = M_PI_2*sum/sin(piv); + } + } + else { + cb = exp(-x)*sqrt(M_PI_2/x); + sum = 1.0; + r = 1.0; + for (k=1;k<=kz;k++) { + r *= 0.125*(vt-pow(2.0*k-1.0,2.0))/(k*x); + sum += r; + } + bk0 = cb*sum; + } + bk1 = (1.0/x-iv[1]*bk0)/iv[0]; + kv[0] = bk0; + kv[1] = bk1; + for (k=2;k<=n;k++) { + bk2 = 2.0*(v0+k-1.0)*bk1/x+bk0; + kv[k] = bk2; + bk0 = bk1; + bk1 = bk2; + } + kvp[0] = v0*kv[0]/x-kv[1]; + for (k=1;k<=n;k++) { + kvp[k] = -(k+v0)*kv[k]/x-kv[k-1]; + } + vm = n+v0; + return 0; +} diff --git a/bessjy.cpp b/bessjy.cpp new file mode 100644 index 0000000..2dc98af --- /dev/null +++ b/bessjy.cpp @@ -0,0 +1,706 @@ +// bessjy.cpp -- computation of Bessel functions Jn, Yn and their +// derivatives. Algorithms and coefficient values from +// "Computation of Special Functions", Zhang and Jin, John +// Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +// Note that 'math.h' provides (or should provide) values for: +// pi M_PI +// 2/pi M_2_PI +// pi/4 M_PI_4 +// pi/2 M_PI_2 +// +#define _USE_MATH_DEFINES +#include +#include "bessel.h" + +double gamma(double x); +// +// INPUT: +// double x -- argument of Bessel function +// +// OUTPUT (via address rtsPointers): +// double j0 -- Bessel function of 1st kind, 0th order +// double j1 -- Bessel function of 1st kind, 1st order +// double y0 -- Bessel function of 2nd kind, 0th order +// double y1 -- Bessel function of 2nd kind, 1st order +// double j0p -- derivative of Bessel function of 1st kind, 0th order +// double j1p -- derivative of Bessel function of 1st kind, 1st order +// double y0p -- derivative of Bessel function of 2nd kind, 0th order +// double y1p -- derivative of Bessel function of 2nd kind, 1st order +// +// RETURN: +// int error code: 0 = OK, 1 = error +// +// This algorithm computes the above functions using series expansions. +// +int bessjy01a(double x,double &j0,double &j1,double &y0,double &y1, + double &j0p,double &j1p,double &y0p,double &y1p) +{ + double x2,r,ec,w0,w1,r0,r1,cs0,cs1; + double cu,p0,q0,p1,q1,t1,t2; + int k,kz; + static double a[] = { + -7.03125e-2, + 0.112152099609375, + -0.5725014209747314, + 6.074042001273483, + -1.100171402692467e2, + 3.038090510922384e3, + -1.188384262567832e5, + 6.252951493434797e6, + -4.259392165047669e8, + 3.646840080706556e10, + -3.833534661393944e12, + 4.854014686852901e14, + -7.286857349377656e16, + 1.279721941975975e19}; + static double b[] = { + 7.32421875e-2, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1, + 5.513358961220206e2, + -1.825775547429318e4, + 8.328593040162893e5, + -5.006958953198893e7, + 3.836255180230433e9, + -3.649010818849833e11, + 4.218971570284096e13, + -5.827244631566907e15, + 9.476288099260110e17, + -1.792162323051699e20}; + static double a1[] = { + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947, + 1.215978918765359e2, + -3.302272294480852e3, + 1.276412726461746e5, + -6.656367718817688e6, + 4.502786003050393e8, + -3.833857520742790e10, + 4.011838599133198e12, + -5.060568503314727e14, + 7.572616461117958e16, + -1.326257285320556e19}; + static double b1[] = { + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1, + -6.038440767050702e2, + 1.971837591223663e4, + -8.902978767070678e5, + 5.310411010968522e7, + -4.043620325107754e9, + 3.827011346598605e11, + -4.406481417852278e13, + 6.065091351222699e15, + -9.833883876590679e17, + 1.855045211579828e20}; + + if (x < 0.0) return 1; + if (x == 0.0) { + j0 = 1.0; + j1 = 0.0; + y0 = -1e308; + y1 = -1e308; + j0p = 0.0; + j1p = 0.5; + y0p = 1e308; + y1p = 1e308; + return 0; + } + x2 = x*x; + if (x <= 12.0) { + j0 = 1.0; + r = 1.0; + for (k=1;k<=30;k++) { + r *= -0.25*x2/(k*k); + j0 += r; + if (fabs(r) < fabs(j0)*1e-15) break; + } + j1 = 1.0; + r = 1.0; + for (k=1;k<=30;k++) { + r *= -0.25*x2/(k*(k+1)); + j1 += r; + if (fabs(r) < fabs(j1)*1e-15) break; + } + j1 *= 0.5*x; + ec = log(0.5*x)+el; + cs0 = 0.0; + w0 = 0.0; + r0 = 1.0; + for (k=1;k<=30;k++) { + w0 += 1.0/k; + r0 *= -0.25*x2/(k*k); + r = r0 * w0; + cs0 += r; + if (fabs(r) < fabs(cs0)*1e-15) break; + } + y0 = M_2_PI*(ec*j0-cs0); + cs1 = 1.0; + w1 = 0.0; + r1 = 1.0; + for (k=1;k<=30;k++) { + w1 += 1.0/k; + r1 *= -0.25*x2/(k*(k+1)); + r = r1*(2.0*w1+1.0/(k+1)); + cs1 += r; + if (fabs(r) < fabs(cs1)*1e-15) break; + } + y1 = M_2_PI * (ec*j1-1.0/x-0.25*x*cs1); + } + else { + if (x >= 50.0) kz = 8; // Can be changed to 10 + else if (x >= 35.0) kz = 10; // " " 12 + else kz = 12; // " " 14 + t1 = x-M_PI_4; + p0 = 1.0; + q0 = -0.125/x; + for (k=0;k=0;k--) { + f = 2.0*(k+1.0)/x*f1-f2; + if (k <= nm) jn[k] = f; + f2 = f1; + f1 = f; + } + if (fabs(bj0) > fabs(bj1)) cs = bj0/f; + else cs = bj1/f2; + for (k=0;k<=nm;k++) { + jn[k] *= cs; + } + } + for (k=2;k<=nm;k++) { + jnp[k] = jn[k-1]-k*jn[k]/x; + } + f0 = yn[0]; + f1 = yn[1]; + for (k=2;k<=nm;k++) { + f = 2.0*(k-1.0)*f1/x-f0; + yn[k] = f; + f0 = f1; + f1 = f; + } + for (k=2;k<=nm;k++) { + ynp[k] = yn[k-1]-k*yn[k]/x; + } + return 0; +} +// +// Same input and output conventions as above. Different recurrence +// relations used for 'x' < 300. +// +int bessjynb(int n,double x,int &nm,double *jn,double *yn, + double *jnp,double *ynp) +{ + double t1,t2,f,f1,f2,bj0,bj1,bjk,by0,by1,cu,s0,su,sv; + double ec,bs,byk,p0,p1,q0,q1; + static double a[] = { + -0.7031250000000000e-1, + 0.1121520996093750, + -0.5725014209747314, + 6.074042001273483}; + static double b[] = { + 0.7324218750000000e-1, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1}; + static double a1[] = { + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947}; + static double b1[] = { + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1}; + + int i,k,m; + nm = n; + if ((x < 0.0) || (n < 0)) return 1; + if (x < 1e-15) { + for (i=0;i<=n;i++) { + jn[i] = 0.0; + yn[i] = -1e308; + jnp[i] = 0.0; + ynp[i] = 1e308; + } + jn[0] = 1.0; + jnp[1] = 0.5; + return 0; + } + if (x <= 300.0 || n > (int)(0.9*x)) { + if (n == 0) nm = 1; + m = msta1(x,200); + if (m < nm) nm = m; + else m = msta2(x,nm,15); + bs = 0.0; + su = 0.0; + sv = 0.0; + f2 = 0.0; + f1 = 1.0e-100; + for (k = m;k>=0;k--) { + f = 2.0*(k+1.0)/x*f1 - f2; + if (k <= nm) jn[k] = f; + if ((k == 2*(int)(k/2)) && (k != 0)) { + bs += 2.0*f; +// su += pow(-1,k>>1)*f/(double)k; + su += (-1)*((k & 2)-1)*f/(double)k; + } + else if (k > 1) { +// sv += pow(-1,k>>1)*k*f/(k*k-1.0); + sv += (-1)*((k & 2)-1)*(double)k*f/(k*k-1.0); + } + f2 = f1; + f1 = f; + } + s0 = bs+f; + for (k=0;k<=nm;k++) { + jn[k] /= s0; + } + ec = log(0.5*x) +0.5772156649015329; + by0 = M_2_PI*(ec*jn[0]-4.0*su/s0); + yn[0] = by0; + by1 = M_2_PI*((ec-1.0)*jn[1]-jn[0]/x-4.0*sv/s0); + yn[1] = by1; + } + else { + t1 = x-M_PI_4; + p0 = 1.0; + q0 = -0.125/x; + for (k=0;k<4;k++) { + p0 += a[k]*pow(x,-2*k-2); + q0 += b[k]*pow(x,-2*k-3); + } + cu = sqrt(M_2_PI/x); + bj0 = cu*(p0*cos(t1)-q0*sin(t1)); + by0 = cu*(p0*sin(t1)+q0*cos(t1)); + jn[0] = bj0; + yn[0] = by0; + t2 = x-0.75*M_PI; + p1 = 1.0; + q1 = 0.375/x; + for (k=0;k<4;k++) { + p1 += a1[k]*pow(x,-2*k-2); + q1 += b1[k]*pow(x,-2*k-3); + } + bj1 = cu*(p1*cos(t2)-q1*sin(t2)); + by1 = cu*(p1*sin(t2)+q1*cos(t2)); + jn[1] = bj1; + yn[1] = by1; + for (k=2;k<=nm;k++) { + bjk = 2.0*(k-1.0)*bj1/x-bj0; + jn[k] = bjk; + bj0 = bj1; + bj1 = bjk; + } + } + jnp[0] = -jn[1]; + for (k=1;k<=nm;k++) { + jnp[k] = jn[k-1]-k*jn[k]/x; + } + for (k=2;k<=nm;k++) { + byk = 2.0*(k-1.0)*by1/x-by0; + yn[k] = byk; + by0 = by1; + by1 = byk; + } + ynp[0] = -yn[1]; + for (k=1;k<=nm;k++) { + ynp[k] = yn[k-1]-k*yn[k]/x; + } + return 0; + +} + +// The following routine computes Bessel Jv(x) and Yv(x) for +// arbitrary positive order (v). For negative order, use: +// +// J-v(x) = Jv(x)cos(v pi) - Yv(x)sin(v pi) +// Y-v(x) = Jv(x)sin(v pi) + Yv(x)cos(v pi) +// +int bessjyv(double v,double x,double &vm,double *jv,double *yv, + double *djv,double *dyv) +{ + double v0,vl,vg,vv,a,a0,r,x2,bjv0,bjv1,bjvl,f,f0,f1,f2; + double r0,r1,ck,cs,cs0,cs1,sk,qx,px,byv0,byv1,rp,xk,rq; + double b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; + int j,k,l,m,n,kz; + + x2 = x*x; + n = (int)v; + v0 = v-n; + if ((x < 0.0) || (v < 0.0)) return 1; + if (x < 1e-15) { + for (k=0;k<=n;k++) { + jv[k] = 0.0; + yv[k] = -1e308; + djv[k] = 0.0; + dyv[k] = 1e308; + if (v0 == 0.0) { + jv[0] = 1.0; + djv[1] = 0.5; + } + else djv[0] = 1e308; + } + vm = v; + return 0; + } + if (x <= 12.0) { + for (l=0;l<2;l++) { + vl = v0 + l; + bjvl = 1.0; + r = 1.0; + for (k=1;k<=40;k++) { + r *= -0.25*x2/(k*(k+vl)); + bjvl += r; + if (fabs(r) < fabs(bjvl)*1e-15) break; + } + vg = 1.0 + vl; + a = pow(0.5*x,vl)/gamma(vg); + if (l == 0) bjv0 = bjvl*a; + else bjv1 = bjvl*a; + } + } + else { + if (x >= 50.0) kz = 8; + else if (x >= 35.0) kz = 10; + else kz = 11; + for (j=0;j<2;j++) { + vv = 4.0*(j+v0)*(j+v0); + px = 1.0; + rp = 1.0; + for (k=1;k<=kz;k++) { + rp *= (-0.78125e-2)*(vv-pow(4.0*k-3.0,2.0))* + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*x2); + px += rp; + } + qx = 1.0; + rq = 1.0; + for (k=1;k<=kz;k++) { + rq *= (-0.78125e-2)*(vv-pow(4.0*k-1.0,2.0))* + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*x2); + qx += rq; + } + qx *= 0.125*(vv-1.0)/x; + xk = x-(0.5*(j+v0)+0.25)*M_PI; + a0 = sqrt(M_2_PI/x); + ck = cos(xk); + sk = sin(xk); + + if (j == 0) { + bjv0 = a0*(px*ck-qx*sk); + byv0 = a0*(px*sk+qx*ck); + } + else if (j == 1) { + bjv1 = a0*(px*ck-qx*sk); + byv1 = a0*(px*sk+qx*ck); + } + } + } + jv[0] = bjv0; + jv[1] = bjv1; + djv[0] = v0*jv[0]/x-jv[1]; + djv[1] = -(1.0+v0)*jv[1]/x+jv[0]; + if ((n >= 2) && (n <= (int)(0.9*x))) { + f0 = bjv0; + f1 = bjv1; + for (k=2;k<=n;k++) { + f = 2.0*(k+v0-1.0)*f1/x-f0; + jv[k] = f; + f0 = f1; + f1 = f; + } + } + else if (n >= 2) { + m = msta1(x,200); + if (m < n) n = m; + else m = msta2(x,n,15); + f2 = 0.0; + f1 = 1.0e-100; + for (k=m;k>=0;k--) { + f = 2.0*(v0+k+1.0)*f1/x-f2; + if (k <= n) jv[k] = f; + f2 = f1; + f1 = f; + } + if (fabs(bjv0) > fabs(bjv1)) cs = bjv0/f; + else cs = bjv1/f2; + for (k=0;k<=n;k++) { + jv[k] *= cs; + } + } + for (k=2;k<=n;k++) { + djv[k] = -(k+v0)*jv[k]/x+jv[k-1]; + } + if (x <= 12.0) { + if (v0 != 0.0) { + for (l=0;l<2;l++) { + vl = v0 +l; + bjvl = 1.0; + r = 1.0; + for (k=1;k<=40;k++) { + r *= -0.25*x2/(k*(k-vl)); + bjvl += r; + if (fabs(r) < fabs(bjvl)*1e-15) break; + } + vg = 1.0-vl; + b = pow(2.0/x,vl)/gamma(vg); + if (l == 0) bju0 = bjvl*b; + else bju1 = bjvl*b; + } + pv0 = M_PI*v0; + pv1 = M_PI*(1.0+v0); + byv0 = (bjv0*cos(pv0)-bju0)/sin(pv0); + byv1 = (bjv1*cos(pv1)-bju1)/sin(pv1); + } + else { + ec = log(0.5*x)+el; + cs0 = 0.0; + w0 = 0.0; + r0 = 1.0; + for (k=1;k<=30;k++) { + w0 += 1.0/k; + r0 *= -0.25*x2/(k*k); + cs0 += r0*w0; + } + byv0 = M_2_PI*(ec*bjv0-cs0); + cs1 = 1.0; + w1 = 0.0; + r1 = 1.0; + for (k=1;k<=30;k++) { + w1 += 1.0/k; + r1 *= -0.25*x2/(k*(k+1)); + cs1 += r1*(2.0*w1+1.0/(k+1.0)); + } + byv1 = M_2_PI*(ec*bjv1-1.0/x-0.25*x*cs1); + } + } + yv[0] = byv0; + yv[1] = byv1; + for (k=2;k<=n;k++) { + byvk = 2.0*(v0+k-1.0)*byv1/x-byv0; + yv[k] = byvk; + byv0 = byv1; + byv1 = byvk; + } + dyv[0] = v0*yv[0]/x-yv[1]; + for (k=1;k<=n;k++) { + dyv[k] = -(k+v0)*yv[k]/x+yv[k-1]; + } + vm = n + v0; + return 0; +} + diff --git a/cbessik.cpp b/cbessik.cpp new file mode 100644 index 0000000..524c3f2 --- /dev/null +++ b/cbessik.cpp @@ -0,0 +1,454 @@ +// cbessik.cpp -- complex modified Bessel functions. +// Algorithms and coefficient values from "Computation of Special +// Functions", Zhang and Jin, John Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +#include +using namespace std; +#include "bessel.h" + +static complex cii(0.0,1.0); +static complex czero(0.0,0.0); +static complex cone(1.0,0.0); + +double gamma(double x); + +int cbessik01(complexz,complex&ci0,complex&ci1, + complex&ck0,complex&ck1,complex&ci0p, + complex&ci1p,complex&ck0p,complex&ck1p) +{ + complex z1,z2,zr,zr2,cr,ca,cb,cs,ct,cw; + double a0,w0; + int k,kz; + static double a[] = { + 0.125, + 7.03125e-2, + 7.32421875e-2, + 1.1215209960938e-1, + 2.2710800170898e-1, + 5.7250142097473e-1, + 1.7277275025845, + 6.0740420012735, + 2.4380529699556e1, + 1.1001714026925e2, + 5.5133589612202e2, + 3.0380905109224e3}; + static double b[] = { + -0.375, + -1.171875e-1, + -1.025390625e-1, + -1.4419555664063e-1, + -2.7757644653320e-1, + -6.7659258842468e-1, + -1.9935317337513, + -6.8839142681099, + -2.7248827311269e1, + -1.2159789187654e2, + -6.0384407670507e2, + -3.3022722944809e3}; + static double a1[] = { + 0.125, + 0.2109375, + 1.0986328125, + 1.1775970458984e1, + 2.1461706161499e2, + 5.9511522710323e3, + 2.3347645606175e5, + 1.2312234987631e7, + 8.401390346421e08, + 7.2031420482627e10}; + + a0 = abs(z); + z2 = z*z; + z1 = z; + if (a0 == 0.0) { + ci0 = cone; + ci1 = czero; + ck0 = complex (1e308,0); + ck1 = complex (1e308,0); + ci0p = czero; + ci1p = complex(0.5,0.0); + ck0p = complex(-1e308,0); + ck1p = complex(-1e308,0); + return 0; + } + if (real(z) < 0.0) z1 = -z; + if (a0 <= 18.0) { + ci0 = cone; + cr = cone; + for (k=1;k<=50;k++) { + cr *= 0.25*z2/(double)(k*k); + ci0 += cr; + if (abs(cr/ci0) < eps) break; + } + ci1 = cone; + cr = cone; + for (k=1;k<=50;k++) { + cr *= 0.25*z2/(double)(k*(k+1.0)); + ci1 += cr; + if (abs(cr/ci1) < eps) break; + } + ci1 *= 0.5*z1; + } + else { + if (a0 >= 50.0) kz = 7; + else if (a0 >= 35.0) kz = 9; + else kz = 12; + ca = exp(z1)/sqrt(2.0*M_PI*z1); + ci0 = cone; + zr = 1.0/z1; + for (k=0;k 0.0) { + ck0 -= cii*M_PI*ci0; + ck1 = -ck1-cii*M_PI*ci1; + } + ci1 = -ci1; + } + ci0p = ci1; + ci1p = ci0-1.0*ci1/z; + ck0p = -ck1; + ck1p = -ck0-1.0*ck1/z; + return 0; +} +int cbessikna(int n,complex z,int &nm,complex *ci, + complex *ck,complex *cip,complex *ckp) +{ + complex ci0,ci1,ck0,ck1,ckk,cf,cf1,cf2,cs; + double a0; + int k,m,ecode; + a0 = abs(z); + nm = n; + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + ci[k] = czero; + ck[k] = complex(-1e308,0); + cip[k] = czero; + ckp[k] = complex(1e308,0); + } + ci[0] = cone; + cip[1] = complex(0.5,0.0); + return 0; + } + ecode = cbessik01(z,ci[0],ci[1],ck[0],ck[1],cip[0],cip[1],ckp[0],ckp[1]); + if (n < 2) return 0; + ci0 = ci[0]; + ci1 = ci[1]; + ck0 = ck[0]; + ck1 = ck[1]; + m = msta1(a0,200); + if (m < n) nm = m; + else m = msta2(a0,n,15); + cf2 = czero; + cf1 = complex(1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(k+1.0)*cf1/z+cf2; + if (k <= nm) ci[k] = cf; + cf2 = cf1; + cf1 = cf; + } + cs = ci0/cf; + for (k=0;k<=nm;k++) { + ci[k] *= cs; + } + for (k=2;k<=nm;k++) { + if (abs(ci[k-1]) > abs(ci[k-2])) { + ckk = (1.0/z-ci[k]*ck[k-1])/ci[k-1]; + } + else { + ckk = (ci[k]*ck[k-2]+2.0*(k-1.0)/(z*z))/ci[k-2]; + } + ck[k] = ckk; + } + for (k=2;k<=nm;k++) { + cip[k] = ci[k-1]-(double)k*ci[k]/z; + ckp[k] = -ck[k-1]-(double)k*ck[k]/z; + } + return 0; +} +int cbessiknb(int n,complex z,int &nm,complex *ci, + complex *ck,complex *cip,complex *ckp) +{ + complex z1,cbs,csk0,cf,cf0,cf1,ca0,cbkl; + complex cg,cg0,cg1,cs0,cs,cr; + double a0,vt,fac; + int k,kz,l,m; + + a0 = abs(z); + nm = n; + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + ci[k] = czero; + ck[k] = complex(1e308,0); + cip[k] = czero; + ckp[k] = complex(-1e308,0); + } + ci[0] = complex(1.0,0.0); + cip[1] = complex(0.5,0.0); + return 0; + } + z1 = z; + if (real(z) < 0.0) z1 = -z; + if (n == 0) nm = 1; + m = msta1(a0,200); + if (m < nm) nm = m; + else m = msta2(a0,nm,15); + cbs = czero; + csk0 = czero; + cf0 = czero; + cf1 = complex(1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(k+1.0)*cf1/z1+cf0; + if (k <=nm) ci[k] = cf; + if ((k != 0) && (k == 2*(k>>1))) csk0 += 4.0*cf/(double)k; + cbs += 2.0*cf; + cf0 = cf1; + cf1 = cf; + } + cs0 = exp(z1)/(cbs-cf); + for (k=0;k<=nm;k++) { + ci[k] *= cs0; + } + if (a0 <= 9.0) { + ck[0] = -(log(0.5*z1)+el)*ci[0]+cs0*csk0; + ck[1] = (1.0/z1-ci[1]*ck[0])/ci[0]; + } + else { + ca0 = sqrt(M_PI_2/z1)*exp(-z1); + if (a0 >= 200.0) kz = 6; + else if (a0 >= 80.0) kz = 8; + else if (a0 >= 25.0) kz = 10; + else kz = 16; + for (l=0;l<2;l++) { + cbkl = cone; + vt = 4.0*l; + cr = cone; + for (k=1;k<=kz;k++) { + cr *= 0.125*(vt-pow(2.0*k-1.0,2.0))/((double)k*z1); + cbkl += cr; + } + ck[l] = ca0*cbkl; + } + } + cg0 = ck[0]; + cg1 = ck[1]; + for (k=2;k<=nm;k++) { + cg = 2.0*(k-1.0)*cg1/z1+cg0; + ck[k] = cg; + cg0 = cg1; + cg1 = cg; + } + if (real(z) < 0.0) { + fac = 1.0; + for (k=0;k<=nm;k++) { + if (imag(z) < 0.0) { + ck[k] = fac*ck[k]+cii*M_PI*ci[k]; + } + else { + ck[k] = fac*ck[k]-cii*M_PI*ci[k]; + } + ci[k] *= fac; + fac = -fac; + } + } + cip[0] = ci[1]; + ckp[0] = -ck[1]; + for (k=1;k<=nm;k++) { + cip[k] = ci[k-1]-(double)k*ci[k]/z; + ckp[k] = -ck[k-1]-(double)k*ck[k]/z; + } + return 0; +} +int cbessikv(double v,complexz,double &vm,complex *civ, + complex *ckv,complex *civp,complex *ckvp) +{ + complex z1,z2,ca1,ca,cs,cr,ci0,cbi0,cf,cf1,cf2; + complex ct,cp,cbk0,ca2,cr1,cr2,csu,cws,cb; + complex cg0,cg1,cgk,cbk1,cvk; + double a0,v0,v0p,v0n,vt,w0,piv,gap,gan; + int m,n,k,kz; + + a0 = abs(z); + z1 = z; + z2 = z*z; + n = (int)v; + v0 = v-n; + piv = M_PI*v0; + vt = 4.0*v0*v0; + if (n == 0) n = 1; + if (a0 < 1e-100) { + for (k=0;k<=n;k++) { + civ[k] = czero; + ckv[k] = complex(-1e308,0); + civp[k] = czero; + ckvp[k] = complex(1e308,0); + } + if (v0 == 0.0) { + civ[0] = cone; + civp[1] = complex (0.5,0.0); + } + vm = v; + return 0; + } + if (a0 >= 50.0) kz = 8; + else if (a0 >= 35.0) kz = 10; + else kz = 14; + if (real(z) <= 0.0) z1 = -z; + if (a0 < 18.0) { + if (v0 == 0.0) { + ca1 = cone; + } + else { + v0p = 1.0+v0; + gap = gamma(v0p); + ca1 = pow(0.5*z1,v0)/gap; + } + ci0 = cone; + cr = cone; + for (k=1;k<=50;k++) { + cr *= 0.25*z2/(k*(k+v0)); + ci0 += cr; + if (abs(cr/ci0) < eps) break; + } + cbi0 = ci0*ca1; + } + else { + ca = exp(z1)/sqrt(2.0*M_PI*z1); + cs = cone; + cr = cone; + for (k=1;k<=kz;k++) { + cr *= -0.125*(vt-pow(2.0*k-1.0,2.0))/((double)k*z1); + cs += cr; + } + cbi0 = ca*cs; + } + m = msta1(a0,200); + if (m < n) n = m; + else m = msta2(a0,n,15); + cf2 = czero; + cf1 = complex(1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(v0+k+1.0)*cf1/z1+cf2; + if (k <= n) civ[k] = cf; + cf2 = cf1; + cf1 = cf; + } + cs = cbi0/cf; + for (k=0;k<=n;k++) { + civ[k] *= cs; + } + if (a0 <= 9.0) { + if (v0 == 0.0) { + ct = -log(0.5*z1)-el; + cs = czero; + w0 = 0.0; + cr = cone; + for (k=1;k<=50;k++) { + w0 += 1.0/k; + cr *= 0.25*z2/(double)(k*k); + cp = cr*(w0+ct); + cs += cp; + if ((k >= 10) && (abs(cp/cs) < eps)) break; + } + cbk0 = ct+cs; + } + else { + v0n = 1.0-v0; + gan = gamma(v0n); + ca2 = 1.0/(gan*pow(0.5*z1,v0)); + ca1 = pow(0.5*z1,v0)/gap; + csu = ca2-ca1; + cr1 = cone; + cr2 = cone; + cws = czero; + for (k=1;k<=50;k++) { + cr1 *= 0.25*z2/(k*(k-v0)); + cr2 *= 0.25*z2/(k*(k+v0)); + csu += ca2*cr1-ca1*cr2; + if ((k >= 10) && (abs((cws-csu)/csu) < eps)) break; + cws = csu; + } + cbk0 = csu*M_PI_2/sin(piv); + } + } + else { + cb = exp(-z1)*sqrt(M_PI_2/z1); + cs = cone; + cr = cone; + for (k=1;k<=kz;k++) { + cr *= 0.125*(vt-pow(2.0*k-1.0,2.0))/((double)k*z1); + cs += cr; + } + cbk0 = cb*cs; + } + cbk1 = (1.0/z1-civ[1]*cbk0)/civ[0]; + ckv[0] = cbk0; + ckv[1] = cbk1; + cg0 = cbk0; + cg1 = cbk1; + for (k=2;k<=n;k++) { + cgk = 2.0*(v0+k-1.0)*cg1/z1+cg0; + ckv[k] = cgk; + cg0 = cg1; + cg1 = cgk; + } + if (real(z) < 0.0) { + for (k=0;k<=n;k++) { + cvk = exp((k+v0)*M_PI*cii); + if (imag(z) < 0.0) { + ckv[k] = cvk*ckv[k]+M_PI*cii*civ[k]; + civ[k] /= cvk; + } + else if (imag(z) > 0.0) { + ckv[k] = ckv[k]/cvk-M_PI*cii*civ[k]; + civ[k] *= cvk; + } + } + } + civp[0] = v0*civ[0]/z+civ[1]; + ckvp[0] = v0*ckv[0]/z-ckv[1]; + for (k=1;k<=n;k++) { + civp[k] = -(k+v0)*civ[k]/z+civ[k-1]; + ckvp[k] = -(k+v0)*ckv[k]/z-ckv[k-1]; + } + vm = n+v0; + return 0; +} diff --git a/cbessjy.cpp b/cbessjy.cpp new file mode 100644 index 0000000..517dc77 --- /dev/null +++ b/cbessjy.cpp @@ -0,0 +1,739 @@ +// cbessjy.cpp -- complex Bessel functions. +// Algorithms and coefficient values from "Computation of Special +// Functions", Zhang and Jin, John Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +#include +using namespace std; +#include "bessel.h" +double gamma(double); + +static complex cii(0.0,1.0); +static complex cone(1.0,0.0); +static complex czero(0.0,0.0); + +#define PI 3.14159 + +int cbessjy01(complex z,complex &cj0,complex &cj1, + complex &cy0,complex &cy1,complex &cj0p, + complex &cj1p,complex &cy0p,complex &cy1p) +{ + complex z1,z2,cr,cp,cs,cp0,cq0,cp1,cq1,ct1,ct2,cu; + double a0,w0,w1; + int k,kz; + + static double a[] = { + -7.03125e-2, + 0.112152099609375, + -0.5725014209747314, + 6.074042001273483, + -1.100171402692467e2, + 3.038090510922384e3, + -1.188384262567832e5, + 6.252951493434797e6, + -4.259392165047669e8, + 3.646840080706556e10, + -3.833534661393944e12, + 4.854014686852901e14, + -7.286857349377656e16, + 1.279721941975975e19}; + static double b[] = { + 7.32421875e-2, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1, + 5.513358961220206e2, + -1.825775547429318e4, + 8.328593040162893e5, + -5.006958953198893e7, + 3.836255180230433e9, + -3.649010818849833e11, + 4.218971570284096e13, + -5.827244631566907e15, + 9.476288099260110e17, + -1.792162323051699e20}; + static double a1[] = { + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947, + 1.215978918765359e2, + -3.302272294480852e3, + 1.276412726461746e5, + -6.656367718817688e6, + 4.502786003050393e8, + -3.833857520742790e10, + 4.011838599133198e12, + -5.060568503314727e14, + 7.572616461117958e16, + -1.326257285320556e19}; + static double b1[] = { + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1, + -6.038440767050702e2, + 1.971837591223663e4, + -8.902978767070678e5, + 5.310411010968522e7, + -4.043620325107754e9, + 3.827011346598605e11, + -4.406481417852278e13, + 6.065091351222699e15, + -9.833883876590679e17, + 1.855045211579828e20}; + + a0 = abs(z); + z2 = z*z; + z1 = z; + if (a0 == 0.0) { + cj0 = cone; + cj1 = czero; + cy0 = complex(-1e308,0); + cy1 = complex(-1e308,0); + cj0p = czero; + cj1p = complex(0.5,0.0); + cy0p = complex(1e308,0); + cy1p = complex(1e308,0); + return 0; + } + if (real(z) < 0.0) z1 = -z; + if (a0 <= 12.0) { + cj0 = cone; + cr = cone; + for (k=1;k<=40;k++) { + cr *= -0.25*z2/(double)(k*k); + cj0 += cr; + if (abs(cr) < abs(cj0)*eps) break; + } + cj1 = cone; + cr = cone; + for (k=1;k<=40;k++) { + cr *= -0.25*z2/(k*(k+1.0)); + cj1 += cr; + if (abs(cr) < abs(cj1)*eps) break; + } + cj1 *= 0.5*z1; + w0 = 0.0; + cr = cone; + cs = czero; + for (k=1;k<=40;k++) { + w0 += 1.0/k; + cr *= -0.25*z2/(double)(k*k); + cp = cr*w0; + cs += cp; + if (abs(cp) < abs(cs)*eps) break; + } + cy0 = M_2_PI*((log(0.5*z1)+el)*cj0-cs); + w1 = 0.0; + cr = cone; + cs = cone; + for (k=1;k<=40;k++) { + w1 += 1.0/k; + cr *= -0.25*z2/(k*(k+1.0)); + cp = cr*(2.0*w1+1.0/(k+1.0)); + cs += cp; + if (abs(cp) < abs(cs)*eps) break; + } + cy1 = M_2_PI*((log(0.5*z1)+el)*cj1-1.0/z1-0.25*z1*cs); + } + else { + if (a0 >= 50.0) kz = 8; // can be changed to 10 + else if (a0 >= 35.0) kz = 10; // " " " 12 + else kz = 12; // " " " 14 + ct1 = z1 - M_PI_4; + cp0 = cone; + for (k=0;k 0.0) { + cy0 += 2.0*cii*cj0; + cy1 = -(cy1+2.0*cii*cj1); + } + cj1 = -cj1; + } + cj0p = -cj1; + cj1p = cj0-cj1/z; + cy0p = -cy1; + cy1p = cy0-cy1/z; + return 0; +} + +int cbessjyna(int n,complex z,int &nm,complex *cj, + complex *cy,complex *cjp,complex *cyp) +{ + complex cbj0,cbj1,cby0,cby1,cj0,cjk,cj1,cf,cf1,cf2; + complex cs,cg0,cg1,cyk,cyl1,cyl2,cylk,cp11,cp12,cp21,cp22; + complex ch0,ch1,ch2; + double a0,yak,ya1,ya0,wa; + int m,k,lb,lb0; + + if (n < 0) return 1; + a0 = abs(z); + nm = n; + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + cj[k] = czero; + cy[k] = complex (-1e308,0); + cjp[k] = czero; + cyp[k] = complex(1e308,0); + } + cj[0] = cone; + cjp[1] = complex(0.5,0.0); + return 0; + } + cbessjy01(z,cj[0],cj[1],cy[0],cy[1],cjp[0],cjp[1],cyp[0],cyp[1]); + cbj0 = cj[0]; + cbj1 = cj[1]; + cby0 = cy[0]; + cby1 = cy[1]; + if (n <= 1) return 0; + if (n < (int)0.25*a0) { + cj0 = cbj0; + cj1 = cbj1; + for (k=2;k<=n;k++) { + cjk = 2.0*(k-1.0)*cj1/z-cj0; + cj[k] = cjk; + cj0 = cj1; + cj1 = cjk; + } + } + else { + m = msta1(a0,200); + if (m < n) nm = m; + else m = msta2(a0,n,15); + cf2 = czero; + cf1 = complex (1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(k+1.0)*cf1/z-cf2; + if (k <=nm) cj[k] = cf; + cf2 = cf1; + cf1 = cf; + } + if (abs(cbj0) > abs(cbj1)) cs = cbj0/cf; + else cs = cbj1/cf2; + for (k=0;k<=nm;k++) { + cj[k] *= cs; + } + } + for (k=2;k<=nm;k++) { + cjp[k] = cj[k-1]-(double)k*cj[k]/z; + } + ya0 = abs(cby0); + lb = 0; + cg0 = cby0; + cg1 = cby1; + for (k=2;k<=nm;k++) { + cyk = 2.0*(k-1.0)*cg1/z-cg0; + yak = abs(cyk); + ya1 = abs(cg0); + if ((yak < ya0) && (yak < ya1)) lb = k; + cy[k] = cyk; + cg0 = cg1; + cg1 = cyk; + } + lb0 = 0; + if ((lb > 4) && (imag(z) != 0.0)) { + while (lb != lb0) { + ch2 = cone; + ch1 = czero; + lb0 = lb; + for (k=lb;k>=1;k--) { + ch0 = 2.0*k*ch1/z-ch2; + ch2 = ch1; + ch1 = ch0; + } + cp12 = ch0; + cp22 = ch2; + ch2 = czero; + ch1 = cone; + for (k=lb;k>=1;k--) { + ch0 = 2.0*k*ch1/z-ch2; + ch2 = ch1; + ch1 = ch0; + } + cp11 = ch0; + cp21 = ch2; + if (lb == nm) + cj[lb+1] = 2.0*lb*cj[lb]/z-cj[lb-1]; + if (abs(cj[0]) > abs(cj[1])) { + cy[lb+1] = (cj[lb+1]*cby0-2.0*cp11/(M_PI*z))/cj[0]; + cy[lb] = (cj[lb]*cby0+2.0*cp12/(M_PI*z))/cj[0]; + } + else { + cy[lb+1] = (cj[lb+1]*cby1-2.0*cp21/(M_PI*z))/cj[1]; + cy[lb] = (cj[lb]*cby1+2.0*cp22/(M_PI*z))/cj[1]; + } + cyl2 = cy[lb+1]; + cyl1 = cy[lb]; + for (k=lb-1;k>=0;k--) { + cylk = 2.0*(k+1.0)*cyl1/z-cyl2; + cy[k] = cylk; + cyl2 = cyl1; + cyl1 = cylk; + } + cyl1 = cy[lb]; + cyl2 = cy[lb+1]; + for (k=lb+1;k z,int &nm,complex *cj, + complex *cy,complex *cjp,complex *cyp) +{ + complex cf,cf0,cf1,cf2,cbs,csu,csv,cs0,ce; + complex ct1,cp0,cq0,cp1,cq1,cu,cbj0,cby0,cbj1,cby1; + complex cyy,cbjk,ct2; + double a0,y0; + int k,m; + static double a[] = { + -0.7031250000000000e-1, + 0.1121520996093750, + -0.5725014209747314, + 6.074042001273483}; + static double b[] = { + 0.7324218750000000e-1, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1}; + static double a1[] = { + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947}; + static double b1[] = { + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1}; + + y0 = abs(imag(z)); + a0 = abs(z); + nm = n; + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + cj[k] = czero; + cy[k] = complex (-1e308,0); + cjp[k] = czero; + cyp[k] = complex(1e308,0); + } + cj[0] = cone; + cjp[1] = complex(0.5,0.0); + return 0; + } + if ((a0 <= 300.0) || (n > (int)(0.25*a0))) { + if (n == 0) nm = 1; + m = msta1(a0,200); + if (m < nm) nm = m; + else m = msta2(a0,nm,15); + cbs = czero; + csu = czero; + csv = czero; + cf2 = czero; + cf1 = complex (1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(k+1.0)*cf1/z-cf2; + if (k <= nm) cj[k] = cf; + if (((k & 1) == 0) && (k != 0)) { + if (y0 <= 1.0) { + cbs += 2.0*cf; + } + else { + cbs += (-1)*((k & 2)-1)*2.0*cf; + } + csu += (double)((-1)*((k & 2)-1))*cf/(double)k; + } + else if (k > 1) { + csv += (double)((-1)*((k & 2)-1)*k)*cf/(double)(k*k-1.0); + } + cf2 = cf1; + cf1 = cf; + } + if (y0 <= 1.0) cs0 = cbs+cf; + else cs0 = (cbs+cf)/cos(z); + for (k=0;k<=nm;k++) { + cj[k] /= cs0; + } + ce = log(0.5*z)+el; + cy[0] = M_2_PI*(ce*cj[0]-4.0*csu/cs0); + cy[1] = M_2_PI*(-cj[0]/z+(ce-1.0)*cj[1]-4.0*csv/cs0); + } + else { + ct1 = z-M_PI_4; + cp0 = cone; + for (k=0;k<4;k++) { + cp0 += a[k]*pow(z,-2.0*k-2.0); + } + cq0 = -0.125/z; + for (k=0;k<4;k++) { + cq0 += b[k] *pow(z,-2.0*k-3.0); + } + cu = sqrt(M_2_PI/z); + cbj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1)); + cby0 = cu*(cp0*sin(ct1)+cq0*cos(ct1)); + cj[0] = cbj0; + cy[0] = cby0; + ct2 = z-0.75*M_PI; + cp1 = cone; + for (k=0;k<4;k++) { + cp1 += a1[k]*pow(z,-2.0*k-2.0); + } + cq1 = 0.375/z; + for (k=0;k<4;k++) { + cq1 += b1[k]*pow(z,-2.0*k-3.0); + } + cbj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2)); + cby1 = cu*(cp1*sin(ct2)+cq1*cos(ct2)); + cj[1] = cbj1; + cy[1] = cby1; + for (k=2;k<=n;k++) { + cbjk = 2.0*(k-1.0)*cbj1/z-cbj0; + cj[k] = cbjk; + cbj0 = cbj1; + cbj1 = cbjk; + } + } + cjp[0] = -cj[1]; + for (k=1;k<=nm;k++) { + cjp[k] = cj[k-1]-(double)k*cj[k]/z; + } + if (abs(cj[0]) > 1.0) + cy[1] = (cj[1]*cy[0]-2.0/(M_PI*z))/cj[0]; + for (k=2;k<=nm;k++) { + if (abs(cj[k-1]) >= abs(cj[k-2])) + cyy = (cj[k]*cy[k-1]-2.0/(M_PI*z))/cj[k-1]; + else + cyy = (cj[k]*cy[k-2]-4.0*(k-1.0)/(M_PI*z*z))/cj[k-2]; + cy[k] = cyy; + } + cyp[0] = -cy[1]; + for (k=1;k<=nm;k++) { + cyp[k] = cy[k-1]-(double)k*cy[k]/z; + } + + return 0; +} + +int cbessjyva(double v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp) +{ + complex z1,z2,zk,cjvl,cr,ca,cjv0,cjv1,cpz,crp; + complex cqz,crq,ca0,cck,csk,cyv0,cyv1,cju0,cju1,cb; + complex cs,cs0,cr0,cs1,cr1,cec,cf,cf0,cf1,cf2; + complex cfac0,cfac1,cg0,cg1,cyk,cp11,cp12,cp21,cp22; + complex ch0,ch1,ch2,cyl1,cyl2,cylk; + + double a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa; + int j,n,k,kz,l,lb,lb0,m; + + a0 = abs(z); + z1 = z; + z2 = z*z; + n = (int)v; + + + v0 = v-n; + + pv0 = M_PI*v0; + pv1 = M_PI*(1.0+v0); + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + cjv[k] = czero; + cyv[k] = complex (-1e308,0); + cjvp[k] = czero; + cyvp[k] = complex (1e308,0); + + } + if (v0 == 0.0) { + cjv[0] = cone; + cjvp[1] = complex (0.5,0.0); + } + else { + cjvp[0] = complex (1e308,0); + } + vm = v; + return 0; + } + if (real(z1) < 0.0) z1 = -z; + if (a0 <= 12.0) { + for (l=0;l<2;l++) { + vl = v0+l; + cjvl = cone; + cr = cone; + for (k=1;k<=40;k++) { + cr *= -0.25*z2/(k*(k+vl)); + cjvl += cr; + if (abs(cr) < abs(cjvl)*eps) break; + } + vg = 1.0 + vl; + ga = gamma(vg); + ca = pow(0.5*z1,vl)/ga; + if (l == 0) cjv0 = cjvl*ca; + else cjv1 = cjvl*ca; + } + } + else { + if (a0 >= 50.0) kz = 8; + else if (a0 >= 35.0) kz = 10; + else kz = 11; + for (j=0;j<2;j++) { + vv = 4.0*(j+v0)*(j+v0); + cpz = cone; + crp = cone; + for (k=1;k<=kz;k++) { + crp = -0.78125e-2*crp*(vv-pow(4.0*k-3.0,2.0))* + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*z2); + cpz += crp; + } + cqz = cone; + crq = cone; + for (k=1;k<=kz;k++) { + crq = -0.78125e-2*crq*(vv-pow(4.0*k-1.0,2.0))* + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*z2); + cqz += crq; + } + cqz *= 0.125*(vv-1.0)/z1; + zk = z1-(0.5*(j+v0)+0.25)*M_PI; + ca0 = sqrt(M_2_PI/z1); + cck = cos(zk); + csk = sin(zk); + if (j == 0) { + cjv0 = ca0*(cpz*cck-cqz*csk); + cyv0 = ca0*(cpz*csk+cqz+cck); + } + else { + cjv1 = ca0*(cpz*cck-cqz*csk); + cyv1 = ca0*(cpz*csk+cqz*cck); + } + } + } + if (a0 <= 12.0) { + if (v0 != 0.0) { + for (l=0;l<2;l++) { + vl = v0+l; + cjvl = cone; + cr = cone; + for (k=1;k<=40;k++) { + cr *= -0.25*z2/(k*(k-vl)); + cjvl += cr; + if (abs(cr) < abs(cjvl)*eps) break; + } + vg = 1.0-vl; + gb = gamma(vg); + cb = pow(2.0/z1,vl)/gb; + if (l == 0) cju0 = cjvl*cb; + else cju1 = cjvl*cb; + } + cyv0 = (cjv0*cos(pv0)-cju0)/sin(pv0); + cyv1 = (cjv1*cos(pv1)-cju1)/sin(pv1); + } + else { + cec = log(0.5*z1)+el; + cs0 = czero; + w0 = 0.0; + cr0 = cone; + for (k=1;k<=30;k++) { + w0 += 1.0/k; + cr0 *= -0.25*z2/(double)(k*k); + cs0 += cr0*w0; + } + cyv0 = M_2_PI*(cec*cjv0-cs0); + cs1 = cone; + w1 = 0.0; + cr1 = cone; + for (k=1;k<=30;k++) { + w1 += 1.0/k; + cr1 *= -0.25*z2/(k*(k+1.0)); + cs1 += cr1*(2.0*w1+1.0/(k+1.0)); + } + cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1); + } + } + if (real(z) < 0.0) { + cfac0 = exp(pv0*cii); + cfac1 = exp(pv1*cii); + if (imag(z) < 0.0) { + cyv0 = cfac0*cyv0-2.0*cii*cos(pv0)*cjv0; + cyv1 = cfac1*cyv1-2.0*cii*cos(pv1)*cjv1; + cjv0 /= cfac0; + cjv1 /= cfac1; + } + else if (imag(z) > 0.0) { + cyv0 = cyv0/cfac0+2.0*cii*cos(pv0)*cjv0; + cyv1 = cyv1/cfac1+2.0*cii*cos(pv1)*cjv1; + cjv0 *= cfac0; + cjv1 *= cfac1; + } + } + cjv[0] = cjv0; + cjv[1] = cjv1; + if ((n >= 2) && (n <= (int)(0.25*a0))) { + cf0 = cjv0; + cf1 = cjv1; + for (k=2;k<= n;k++) { + cf = 2.0*(k+v0-1.0)*cf1/z-cf0; + cjv[k] = cf; + cf0 = cf1; + cf1 = cf; + } + } + else if (n >= 2) { + m = msta1(a0,200); + if (m < n) n = m; + else m = msta2(a0,n,15); + cf2 = czero; + cf1 = complex(1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(v0+k+1.0)*cf1/z-cf2; + if (k <= n) cjv[k] = cf; + cf2 = cf1; + cf1 = cf; + } + if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf; + else cs = cjv1/cf2; + for (k=0;k<=n;k++) { + cjv[k] *= cs; + } + } + cjvp[0] = v0*cjv[0]/z-cjv[1]; + for (k=1;k<=n;k++) { + cjvp[k] = -(k+v0)*cjv[k]/z+cjv[k-1]; + } + cyv[0] = cyv0; + cyv[1] = cyv1; + ya0 = abs(cyv0); + lb = 0; + cg0 = cyv0; + cg1 = cyv1; + for (k=2;k<=n;k++) { + cyk = 2.0*(v0+k-1.0)*cg1/z-cg0; + yak = abs(cyk); + ya1 = abs(cg0); + if ((yak < ya0) && (yak< ya1)) lb = k; + cyv[k] = cyk; + cg0 = cg1; + cg1 = cyk; + } + lb0 = 0; + if ((lb > 4) && (imag(z) != 0.0)) { + while(lb != lb0) { + ch2 = cone; + ch1 = czero; + lb0 = lb; + for (k=lb;k>=1;k--) { + ch0 = 2.0*(k+v0)*ch1/z-ch2; + ch2 = ch1; + ch1 = ch0; + } + cp12 = ch0; + cp22 = ch2; + ch2 = czero; + ch1 = cone; + for (k=lb;k>=1;k--) { + ch0 = 2.0*(k+v0)*ch1/z-ch2; + ch2 = ch1; + ch1 = ch0; + } + cp11 = ch0; + cp21 = ch2; + if (lb == n) + cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1]; + if (abs(cjv[0]) > abs(cjv[1])) { + cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0]; + cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0]; + } + else { + cyv[lb+1] = (cjv[lb+1]*cyv1-2.0*cp21/(M_PI*z))/cjv[1]; + cyv[lb] = (cjv[lb]*cyv1+2.0*cp22/(M_PI*z))/cjv[1]; + } + cyl2 = cyv[lb+1]; + cyl1 = cyv[lb]; + for (k=lb-1;k>=0;k--) { + cylk = 2.0*(k+v0+1.0)*cyl1/z-cyl2; + cyv[k] = cylk; + cyl2 = cyl1; + cyl1 = cylk; + } + cyl1 = cyv[lb]; + cyl2 = cyv[lb+1]; + for (k=lb+1;k z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp) +{ + //first, compute the bessel functions of fractional order + cbessjyva(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); + + //iterate through each and scale + for(int n = 0; n<=v; n++) + { + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); + + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); + } + + return 0; + +} + + + diff --git a/colormap.h b/colormap.h new file mode 100644 index 0000000..cc68f32 --- /dev/null +++ b/colormap.h @@ -0,0 +1,229 @@ +#ifndef RTS_COLORMAP_H +#define RTS_COLORMAP_H + +#include +#include +#include +#include "rts/cuda_handle_error.h" + + +#define BREWER_CTRL_PTS 11 + +#ifdef __CUDACC__ +texture cudaTexBrewer; +static cudaArray* gpuBrewer; +#endif + + + +namespace rts{ + namespace colormap{ + +enum colormapType {cmBrewer, cmGrayscale}; + +static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) +{ + //create an image object + QImage image(x_size, y_size, QImage::Format_RGB32); + + int i; + unsigned char r, g, b; + unsigned int x, y; + for(y=0; y +__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + //compute the normalized value on [minVal maxVal] + float a = (gpuSource[i] - minVal) / (maxVal - minVal); + + //lookup the color + float shift = 1.0/BREWER_CTRL_PTS; + float4 color = tex1D(cudaTexBrewer, a+shift); + + gpuDest[i * 3 + 0] = 255 * color.x; + gpuDest[i * 3 + 1] = 255 * color.y; + gpuDest[i * 3 + 2] = 255 * color.z; +} + +template +__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + //compute the normalized value on [minVal maxVal] + float a = (gpuSource[i] - minVal) / (maxVal - minVal); + + gpuDest[i * 3 + 0] = 255 * a; + gpuDest[i * 3 + 1] = 255 * a; + gpuDest[i * 3 + 2] = 255 * a; +} + +template +static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) +{ + //This function converts a scalar field on the GPU to a color image on the GPU + int gridDim = (nVals + blockDim - 1)/blockDim; + if(cm == cmGrayscale) + applyGrayscale<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); + else if(cm == cmBrewer) + { + initBrewer(); + applyBrewer<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); + destroyBrewer(); + } + +} + +template +static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) +{ + //this function converts a scalar field on the GPU to a color image on the CPU + + //first create the color image on the GPU + + //allocate GPU memory for the color image + unsigned char* gpuDest; + HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); + + //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); + + //create the image on the gpu + gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); + + //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); + + //copy the image from the GPU to the CPU + HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); + + HANDLE_ERROR(cudaFree( gpuDest )); + +} + +template +static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) +{ + //allocate a color buffer + unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); + + //do the mapping + gpu2cpu(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); + + //copy the buffer to an image + buffer2image(cpuBuffer, fileDest, x_size, y_size); + + free(cpuBuffer); +} + +#endif + +template +static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) +{ + int i; + float a; + float range = valMax - valMin; + for(i = 0; i +static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) +{ + //allocate a color buffer + unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); + + //do the mapping + cpu2cpu(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); + + //copy the buffer to an image + buffer2image(cpuBuffer, fileDest, x_size, y_size); + + free(cpuBuffer); + +} + +}} //end namespace colormap and rts + +#endif + diff --git a/dataTypes.h b/dataTypes.h new file mode 100644 index 0000000..664366b --- /dev/null +++ b/dataTypes.h @@ -0,0 +1,39 @@ +#ifndef DATATYPES_H +#define DATATYPES_H + +#include + +#define PRECISION_SINGLE + +#ifdef PRECISION_SINGLE +typedef float ptype; +#elif defined PRECISION_DOUBLE +typedef double ptype; +#endif + +#define BLOCK 256 +#define SQRT_BLOCK 16 + +#define PI 3.14159 + +//a very small number +#define EPSILON 0.00001 + +//CUDA hybrid code - complex class should run on both the CPU and GPU + + +typedef ptype fieldPoint; + +//hybrid GPU/CPU complex data typ +#include "rts/rtsComplex.h" +#include "rts/rtsVector.h" +#include "rts/rtsPoint.h" +#include "rts/rtsQuad.h" + +typedef rts::rtsComplex bsComplex; +typedef rts::rtsVector bsVector; +typedef rts::rtsPoint bsPoint; +typedef rts::rtsQuad bsRect; + + +#endif diff --git a/defaults.h b/defaults.h new file mode 100644 index 0000000..d2bc201 --- /dev/null +++ b/defaults.h @@ -0,0 +1,86 @@ +#ifndef BIMSIM_DEFAULTS +#define BIMSIM_DEFAULTS + +//default sphere parameters +#define DEFAULT_SPHERE_X 0 +#define DEFAULT_SPHERE_Y 0 +#define DEFAULT_SPHERE_Z 0 +#define DEFAULT_SPHERE_A 1 + +//default near field parameters +#define DEFAULT_LAMBDA 1 +#define DEFAULT_AMPLITUDE 1 +#define DEFAULT_N 1.4 +#define DEFAULT_K 0.5 +#define DEFAULT_FOCUS_X 0 +#define DEFAULT_FOCUS_Y 0 +#define DEFAULT_FOCUS_Z 0 +#define DEFAULT_INCIDENT_ORDER 100 +#define DEFAULT_STABILITY_PARM 1.4 + +//optics +#define DEFAULT_CONDENSER_MIN 0.0 +#define DEFAULT_CONDENSER_MAX 1 + +#define DEFAULT_OBJECTIVE_MIN 0.0 +#define DEFAULT_OBJECTIVE_MAX 1 + +//incident light direction +#define DEFAULT_K_THETA 0 +#define DEFAULT_K_PHI 0 + +//default flags +#define DEFAULT_PLANEWAVE false +#define DEFAULT_VECTORSIM false +//#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective + + +#define DEFAULT_SLICE_MIN_X -5 +#define DEFAULT_SLICE_MIN_Y 0 +#define DEFAULT_SLICE_MIN_Z -5 + +#define DEFAULT_SLICE_MAX_X 5 +#define DEFAULT_SLICE_MAX_Y 0 +#define DEFAULT_SLICE_MAX_Z 5 + +#define DEFAULT_SLICE_NORM_X 0 +#define DEFAULT_SLICE_NORM_Y 1 +#define DEFAULT_SLICE_NORM_Z 0 + + +/* +#define DEFAULT_SLICE_MIN_X -6 +#define DEFAULT_SLICE_MIN_Y -6 +#define DEFAULT_SLICE_MIN_Z 1 + +#define DEFAULT_SLICE_MAX_X 6 +#define DEFAULT_SLICE_MAX_Y 6 +#define DEFAULT_SLICE_MAX_Z 1 + +#define DEFAULT_SLICE_NORM_X 0 +#define DEFAULT_SLICE_NORM_Y 0 +#define DEFAULT_SLICE_NORM_Z 1 +*/ + + +#define DEFAULT_FIELD_ORDER 200 + +#define DEFAULT_SAMPLES 200 + +#define DEFAULT_SLICE_RES 256 + +#define DEFAULT_PADDING 1 +#define DEFAULT_SUPERSAMPLE 1 + +#define DEFAULT_INTENSITY_FILE "out_di.bmp" +#define DEFAULT_TRANSMITTANCE_FILE "" +#define DEFAULT_ABSORBANCE_FILE "" +#define DEFAULT_NEAR_FILE "out_n.bmp" +#define DEFAULT_FAR_FILE "out_f.bmp" +#define DEFAULT_EXTENDED_SOURCE "" +#define DEFAULT_FIELD_TYPE "magnitude" +#define DEFAULT_FORMAT fileoutStruct::formatImage +#define DEFAULT_COLORMAP "brewer" + + +#endif diff --git a/fieldslice.cpp b/fieldslice.cpp new file mode 100644 index 0000000..5df3b55 --- /dev/null +++ b/fieldslice.cpp @@ -0,0 +1,89 @@ +#include "fieldslice.h" +#include "dataTypes.h" + +#include "cufft.h" + +#include +#include +using namespace std; + +fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) +{ + //save the slice resolution + R[0] = x_size; + R[1] = x_size; + + scalarField = true; + + //init_gpu(); + + +} + +void fieldslice::toAngularSpectrum() +{ + cufftHandle plan; + + //create a CUFFT plan handle + if(cufftPlan2d(&plan, R[0], R[1], CUFFT_C2C) != CUFFT_SUCCESS) + { + cout<<"Error creating CUFFT plan for computing the angular spectrum."<= N) return; + + ptype xm = x[i].abs(); + + + if(y != NULL && z != NULL) + { + ptype ym = y[i].abs(); + ptype zm = z[i].abs(); + I[i] = xm*xm + ym*ym + zm*zm; + } + else + { + I[i] = xm*xm; + } +} + +__global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* D, int uR, int vR, int ss) +{ + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index into the detector + int i = iv*uR + iu; + + //compute the index into the field + int fi; + + //initialize the intensity for the pixel to zero + ptype I = 0; + ptype xm = 0; + ptype ym = 0; + ptype zm = 0; + + int ix, iy; + for(ix = 0; ix= N) return; + + V[i] = field_component[i].real(); +} + +__global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + V[i] = field_component[i].imag(); +} + +__global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + output[i] = sqrt(input[i]); + +} + + +__global__ void field_scale(bsComplex* x, bsComplex* y, bsComplex* z, unsigned int N, ptype v) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + if(x != NULL) + x[i] *= v; + if(y != NULL) + y[i] *= v; + if(z != NULL) + z[i] *= v; +} + + +scalarslice fieldslice::Mag() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + scalarslice* result = new scalarslice(R[0], R[1]); + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + field_intensity<<>>(x_hat, y_hat, z_hat, result->S, N); + field_sqrt<<>>(result->S, result->S, N); + + return *result; +} + +scalarslice fieldslice::Real() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* result = new scalarslice(R[0], R[1]); + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + field_real<<>>(x_hat, result->S, N); + + return *result; +} + +scalarslice fieldslice::Imag() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* result = new scalarslice(R[0], R[1]); + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + field_imaginary<<>>(x_hat, result->S, N); + + return *result; +} + +void fieldslice::IntegrateAndResample(scalarslice* detector, unsigned int supersample) +{ + //compute the intensity and resample at the detector resolution + unsigned int D[2]; + D[0] = detector->R[0]; + D[1] = detector->R[1]; + + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((D[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (D[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + resample_intensity<<>>(x_hat, y_hat, z_hat, detector->S, D[0], D[1], supersample); +} + +scalarslice fieldslice::Intensity() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* result = new scalarslice(R[0], R[1]); + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + field_intensity<<>>(x_hat, y_hat, z_hat, result->S, N); + + return *result; +} + +void fieldslice::ScaleField(ptype v) +{ + //This function scales the field by some constant value v + //This is mostly used for the inverse FFT, which has to divide the field by R^2 + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + //cout<<"Size of mag field: "<>>(x_hat, y_hat, z_hat, N, v); + +} + +void fieldslice::init_gpu() +{ + //allocate space on the GPU for the field slice + HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); + //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex))); + + //if the field is scalar, y_hat and z_hat are unused + if(scalarField) + { + y_hat = NULL; + z_hat = NULL; + + } + else + { + HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); + //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); + + HANDLE_ERROR(cudaMalloc((void**)&z_hat, R[0] * R[1] * sizeof(bsComplex))); + //HANDLE_ERROR(cudaMemset(z_hat, 0, R[0] * R[1] * sizeof(bsComplex))); + } + + clear_gpu(); +} + +void fieldslice::kill_gpu() +{ + if(x_hat != NULL) + HANDLE_ERROR(cudaFree(x_hat)); + if(y_hat != NULL) + HANDLE_ERROR(cudaFree(y_hat)); + if(z_hat != NULL) + HANDLE_ERROR(cudaFree(z_hat)); + +} + +void fieldslice::clear_gpu() +{ + int memsize = R[0] * R[1] * sizeof(bsComplex); + if(x_hat != NULL) + HANDLE_ERROR(cudaMemset(x_hat, 0, memsize)); + if(y_hat != NULL) + HANDLE_ERROR(cudaMemset(y_hat, 0, memsize)); + if(z_hat != NULL) + HANDLE_ERROR(cudaMemset(z_hat, 0, memsize)); + +} + +__global__ void copy_crop(bsComplex* source, bsComplex* dest, int u, int v, int su, int sv, int uR, int vR) +{ + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= su || iv >= sv) return; + + //compute the destination index + int i = iv*su + iu; + + //compute the source index + int sourceV = v + iv; + int sourceU = u + iu; + int is = sourceV * uR + sourceU; + + dest[i] = source[is]; + +} + +fieldslice fieldslice::crop(int u, int v, int su, int sv) +{ + //create a new field slice with the appropriate settings + fieldslice result(su, sv); + result.scalarField = scalarField; + + //allocate space for the new field + result.init_gpu(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((su + SQRT_BLOCK -1)/SQRT_BLOCK, (sv + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //call a kernel to copy the cropped to the new field slice + if(x_hat != NULL) + copy_crop<<>>(x_hat, result.x_hat, u, v, su, sv, R[0], R[1]); + if(y_hat != NULL) + copy_crop<<>>(y_hat, result.y_hat, u, v, su, sv, R[0], R[1]); + if(z_hat != NULL) + copy_crop<<>>(z_hat, result.z_hat, u, v, su, sv, R[0], R[1]); + + return result; +} diff --git a/fieldslice.h b/fieldslice.h new file mode 100644 index 0000000..a8c449c --- /dev/null +++ b/fieldslice.h @@ -0,0 +1,58 @@ +#ifndef FIELDSLICE_H +#define FIELDSLICE_H + +#include "dataTypes.h" + +#include +#include +#include "rts/rtsQuad.h" + +#include "scalarslice.h" + + +struct fieldslice +{ + //images representing the slice for each world-space polarization direction + bsComplex* x_hat; + bsComplex* y_hat; + bsComplex* z_hat; + + + + //slice resolution + unsigned int R[2]; + + //if the slice is a scalar + bool scalarField; + + fieldslice(unsigned int x_res, unsigned int y_res); + + fieldslice(); + + ~fieldslice(); + + //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); + + scalarslice Mag(); + scalarslice Real(); + scalarslice Imag(); + scalarslice Intensity(); + void IntegrateAndResample(scalarslice* detector, unsigned int supersample); + + void ScaleField(ptype v); + + //convert the field slice to the angular spectrum + void toAngularSpectrum(); + void fromAngularSpectrum(); + + //crop a region from the field + fieldslice crop(int u, int v, int su, int sv); + + void init_gpu(); + void kill_gpu(); + void clear_gpu(); + + std::string toStr(); +}; + +#endif diff --git a/fileout.cu b/fileout.cu new file mode 100644 index 0000000..9dc7bcd --- /dev/null +++ b/fileout.cu @@ -0,0 +1,202 @@ +#include "fileout.h" +//#include "scalarfield.h" + + +/*void fileoutStruct::saveMag(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + int Rx = U->R[0]; + int Ry = U->R[1]; + + //allocate space for a scalar field on the GPU + ptype* gpuScalar; + int memsize = sizeof(ptype) * Rx * Ry; + HANDLE_ERROR(cudaMalloc((void**) &gpuScalar, memsize)); + HANDLE_ERROR(cudaMemset(gpuScalar, 0, memsize)); + U->Mag(gpuScalar); + + + rts::colormap::gpu2image(gpuScalar, filename, Rx, Ry, 0, colorMax, cmap); + + HANDLE_ERROR(cudaFree(gpuScalar)); +} + +void fileoutStruct::saveReal_scalar(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + //returns the real component + scalarslice sf = U->Real(); + sf.toImage(filename, false, cmap); + +} + +void fileoutStruct::saveImag_scalar(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + //returns the imaginary component of a field (assumed scalar) + scalarslice sf = U->Imag(); + sf.toImage(filename, false, cmap); +} + +void fileoutStruct::saveIntensity(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + //get the intensity of the field + scalarslice sf = U->Intensity(); + sf.toImage(filename, true, cmap); +} + +void fileoutStruct::saveAngularSpectrum(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + ptype* gpuScalar; + int memsize = sizeof(ptype) * U->R[0] * U->R[1]; + HANDLE_ERROR(cudaMalloc((void**) &gpuScalar, memsize)); + HANDLE_ERROR(cudaMemset(gpuScalar, 0, memsize)); + + //convert the field slice to its angular spectrum + U->toAngularSpectrum(); + + //convert the angular spectrum to a scalar field + U->Mag(gpuScalar); + + //save the color image + rts::colormap::gpu2image(gpuScalar, filename, U->R[0], U->R[1], 0, colorMax, cmap); + + HANDLE_ERROR(cudaFree(gpuScalar)); + +}*/ + +void fileoutStruct::saveNearField(nearfieldStruct* nf) +{ + if(nearFile == "") return; + + if(field == fieldReal) + { + scalarslice S = nf->U.Real(); + S.toImage(nearFile, false, colormap); + } + if(field == fieldImag) + { + scalarslice S = nf->U.Imag(); + S.toImage(nearFile, false, colormap); + } + if(field == fieldMag) + { + scalarslice S = nf->U.Mag(); + S.toImage(nearFile, true, colormap); + } +} + +void fileoutStruct::saveFarField(microscopeStruct* scope) +{ + if(farFile == "") return; + + if(field == fieldReal) + { + scalarslice S = scope->Ud.Real(); + S.toImage(farFile, false, colormap); + } + if(field == fieldImag) + { + scalarslice S = scope->Ud.Imag(); + S.toImage(farFile, false, colormap); + } + if(field == fieldMag) + { + scalarslice S = scope->Ud.Mag(); + S.toImage(farFile, true, colormap); + } + +} + +void fileoutStruct::saveDetector(microscopeStruct* scope) +{ + //intensity + if(intFile != "") + { + scalarslice I = scope->getIntensity(); + + if(is_binary(intFile)) + I.toEnvi(intFile, scope->nf.lambda); + else + I.toImage(intFile); + } + //absorbance + if(absFile != "") + { + scalarslice I = scope->getAbsorbance(); + + if(is_binary(absFile)) + I.toEnvi(absFile, scope->nf.lambda); + else + I.toImage(absFile); + } + //transmittance + if(transFile != "") + { + scalarslice I = scope->getTransmittance(); + + if(is_binary(transFile)) + I.toEnvi(transFile, scope->nf.lambda); + else + I.toImage(transFile); + } + +} + +bool fileoutStruct::is_binary(std::string filename) +{ + //this function guesses if a file name is binary or a standard image + // do this by just testing extensions + + //get the extension + size_t i = filename.find_last_of('.'); + + //if there is no extension, return true + if( i == std::string::npos ) + return true; + + //otherwise grab the extension + std::string ext = filename.substr(i+1); + if(ext == "bmp" || + ext == "jpg" || + ext == "tif" || + ext == "gif" || + ext == "png") + return false; + else + return true; +} + + + +void fileoutStruct::Save(microscopeStruct* scope) +{ + //save images of the fields in the microscope + + //if the user specifies an extended source + if(scope->focalPoints.size() > 1) + { + //simulate the extended source and output the detector image + scope->SimulateExtendedSource(); + + } + else + { + //run the near-field simulation + scope->SimulateScattering(); + + //output the near field image + saveNearField(&scope->nf); + + //run the far-field simulation + scope->SimulateImaging(); + + saveFarField(scope); + + } + + //save the detector images + saveDetector(scope); + + +} + + + diff --git a/fileout.h b/fileout.h new file mode 100644 index 0000000..e3f8887 --- /dev/null +++ b/fileout.h @@ -0,0 +1,48 @@ +#ifndef FILE_OUTPUT_H +#define FILE_OUTPUT_H + +#include +//#include "defaults.h" +#include "dataTypes.h" + +#include "colormap.h" +#include "fieldslice.h" +#include "nearfield.h" +#include "microscope.h" +#include "rts/cuda_handle_error.h" + +struct fileoutStruct{ + + //output file names + std::string nearFile; //near field filename + std::string farFile; //far field filename + std::string intFile; //detector intensity filename + std::string absFile; //detector absorbance filename + std::string transFile; //detector transmission filename + + //output type + enum field_type {fieldMag, fieldIntensity, fieldAbsorbance, fieldPolar, fieldImag, fieldReal, fieldAngularSpectrum}; + enum image_source {imageNearField, imageObjective, imageDetector, imageExtendedSource}; + + field_type field; + + //image_source source; + + //color map info + rts::colormap::colormapType colormap; + ptype colorMax; + + + void Save(microscopeStruct* scope); + void Simulate(microscopeStruct* scope); + + private: + bool is_binary(std::string filename); + void saveNearField(nearfieldStruct* nf); + void saveFarField(microscopeStruct* scope); + void saveDetector(microscopeStruct* scope); + + +}; + +#endif diff --git a/gamma.cpp b/gamma.cpp new file mode 100644 index 0000000..150eb9c --- /dev/null +++ b/gamma.cpp @@ -0,0 +1,82 @@ +// gamma.cpp -- computation of gamma function. +// Algorithms and coefficient values from "Computation of Special +// Functions", Zhang and Jin, John Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +// Returns gamma function of argument 'x'. +// +// NOTE: Returns 1e308 if argument is a negative integer or 0, +// or if argument exceeds 171. +// +#define _USE_MATH_DEFINES +#include +double gamma(double x) +{ + int i,k,m; + double ga,gr,r,z; + + static double g[] = { + 1.0, + 0.5772156649015329, + -0.6558780715202538, + -0.420026350340952e-1, + 0.1665386113822915, + -0.421977345555443e-1, + -0.9621971527877e-2, + 0.7218943246663e-2, + -0.11651675918591e-2, + -0.2152416741149e-3, + 0.1280502823882e-3, + -0.201348547807e-4, + -0.12504934821e-5, + 0.1133027232e-5, + -0.2056338417e-6, + 0.6116095e-8, + 0.50020075e-8, + -0.11812746e-8, + 0.1043427e-9, + 0.77823e-11, + -0.36968e-11, + 0.51e-12, + -0.206e-13, + -0.54e-14, + 0.14e-14}; + + if (x > 171.0) return 1e308; // This value is an overflow flag. + if (x == (int)x) { + if (x > 0.0) { + ga = 1.0; // use factorial + for (i=2;i 1.0) { + z = fabs(x); + m = (int)z; + r = 1.0; + for (k=1;k<=m;k++) { + r *= (z-k); + } + z -= m; + } + else + z = x; + gr = g[24]; + for (k=23;k>=0;k--) { + gr = gr*z+g[k]; + } + ga = 1.0/(gr*z); + if (fabs(x) > 1.0) { + ga *= r; + if (x < 0.0) { + ga = -M_PI/(x*ga*sin(M_PI*x)); + } + } + } + return ga; +} diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..dfdba62 --- /dev/null +++ b/main.cpp @@ -0,0 +1,65 @@ +#include "sphere.h" +#include "rts/material.h" +#include +//#include "rts/complex.h" + +#include "nearfield.h" +//nearfieldStruct* NF; + +#include "microscope.h" +microscopeStruct* SCOPE; + +#include "fieldslice.h" + +#include "fileout.h" +#include "options.h" +#include "montecarlo.h" +#include "rts/rtsPoint.h" +#include "rts/sbessel.h" +#include "rts/rtsMatrix.h" +#include "rts/rtsQuaternion.h" + +#include "rtsEnvi.h" + +#include "warnings.h" + +fileoutStruct gFileOut; +using namespace std; + +int cbessjyva(double v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp); + +int main(int argc, char *argv[]) +{ + //test Envi loading and saving + //EnviFile envi("testenvi", "w"); + + //float* data = (float*)malloc(sizeof(float) * 100 * 100); + //envi.addBand(data, 100, 100, 100); + + //envi.close(); + + //return 0; + + SCOPE = new microscopeStruct(); + + cout<nf.Uf.R[0]<init(); + + OutputOptions(); + + gFileOut.Save(SCOPE); + + //NF->destroy(); + SCOPE->destroy(); + + + + +} diff --git a/microscope.cu b/microscope.cu new file mode 100644 index 0000000..9154a5e --- /dev/null +++ b/microscope.cu @@ -0,0 +1,303 @@ +#include "microscope.h" + +#include "rts/cuda_handle_error.h" +#include "rts/rtsProgressBar.h" +#include "rts/cuda_timer.h" +#include "dataTypes.h" +#include "colormap.h" + +#include + +__global__ void bandpass(bsComplex* U, int uR, int vR, ptype du, ptype dv, ptype NAin, ptype NAout, ptype lambda) +{ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + ptype u, v; + + if(iu <= uR / 2) + u = (ptype)iu * du; + else + u = -(ptype)(uR - 1 - iu) * du; + + if(iv <= vR / 2) + v = (ptype)iv * dv; + else + v = -(ptype)(vR - 1 - iv) * dv; + + ptype fmag = sqrt(u*u + v*v); + + if(fmag < NAin / lambda || fmag > NAout / lambda) + U[i] = 0; + //U[i] = U[i]; +} + +microscopeStruct::microscopeStruct() +{ + scalarSim = true; + D = NULL; + Di = NULL; +} + +void microscopeStruct::init() +{ + //Ud.scalarField = scalarSim; + //Ufd.scalarField = scalarSim; + + //Ud.init_gpu(); + //Ufd.init_gpu(); + + //initialize the near field + nf.init(); + + //allocate space for the detector + D = new scalarslice(Ud.R[0] / ss, Ud.R[1] / ss); + Di = new scalarslice(Ud.R[0] / ss, Ud.R[1] / ss); + + //clear the detector + clearDetector(); + +} + +void microscopeStruct::destroy() +{ + delete D; + D = NULL; + + delete Di; + Di = NULL; + + Ud.kill_gpu(); + Ufd.kill_gpu(); + + //destroy the near field + nf.destroy(); + +} + +void microscopeStruct::applyBandpass() +{ + //This function applies the objective bandpass to the near field + //The near field structure stores the results, in order to save memory + + //first convert the near field to an angular spectrum (FFT) + nf.U.toAngularSpectrum(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((nf.U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (nf.U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //compute the step size in the frequency domain + ptype du = 1.0 / (nf.pos.X.len()); + ptype dv = 1.0 / (nf.pos.Y.len()); + + //apply the objective band-pass filter + bandpass<<>>(nf.U.x_hat, nf.U.R[0], nf.U.R[1], du, dv, objective[0], objective[1], nf.lambda); + + //convert the near field image back to the spatial domain + // (this is the field at the detector) + nf.U.fromAngularSpectrum(); +} + +void microscopeStruct::getFarField() +{ + //Compute the Far Field image of the focal plane + + //clear the memory from previous detector fields + Ud.kill_gpu(); + Ufd.kill_gpu(); + + //first crop the filtered near-field image of the source and scattered fields + Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); + Ufd = nf.Uf.crop(padding * Ufd.R[0], padding * Ufd.R[1], Ufd.R[0], Ufd.R[1]); + +} + +void microscopeStruct::integrateDetector() +{ + Ud.IntegrateAndResample(D, ss); + Ufd.IntegrateAndResample(Di, ss); +} + +void microscopeStruct::clearDetector() +{ + //zero-out the detector + D->clear(); + Di->clear(); +} + +//flag for a vector simulation +void microscopeStruct::setPos(bsPoint pMin, bsPoint pMax, bsVector normal) +{ + pos = rts::rtsQuad(pMin, pMax, normal); +} + +void microscopeStruct::setRes(int x_res, int y_res, int pad, int supersampling) +{ + padding = pad; + ss = supersampling; + + Ufd.R[0] = Ud.R[0] = x_res * ss; + Ufd.R[1] = Ud.R[1] = y_res * ss; +} + +void microscopeStruct::setNearfield() +{ + //sets the values for the near field in order to create the specified detector image + + //compute the size of the near-field slice necessary to create the detector image + nf.pos = pos * (padding * 2 + 1); + + //compute the resolution of the near-field slice necessary to create the detector image + nf.setRes(Ud.R[0] * (padding * 2 + 1), Ud.R[1] * (padding * 2 + 1)); + +} + +__global__ void calc_absorbance(ptype* A, ptype* D, ptype* Di, int N) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + A[i] = -log10(D[i] / Di[i]); + +} + +scalarslice microscopeStruct::getAbsorbance() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* A = new scalarslice(D->R[0], D->R[1]); + + //compute the total number of values in the slice + unsigned int N = D->R[0] * D->R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + calc_absorbance<<>>(A->S, D->S, Di->S, N); + + return *A; +} + +__global__ void calc_transmittance(ptype* A, ptype* D, ptype* Di, int N) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + A[i] = D[i] / Di[i]; + +} + +scalarslice microscopeStruct::getTransmittance() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* T = new scalarslice(D->R[0], D->R[1]); + + //compute the total number of values in the slice + unsigned int N = D->R[0] * D->R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + calc_transmittance<<>>(T->S, D->S, Di->S, N); + + return *T; +} + +scalarslice microscopeStruct::getIntensity() +{ + //create a scalar slice at the same resolution as the field + scalarslice* I = new scalarslice(D->R[0], D->R[1]); + + HANDLE_ERROR(cudaMemcpy(I->S, D->S, sizeof(ptype) * D->R[0] * D->R[1], cudaMemcpyDeviceToDevice)); + + return *I; + +} + +void microscopeStruct::SimulateScattering() +{ + nf.Simulate(); +} + +void microscopeStruct::SimulateImaging() +{ + applyBandpass(); + getFarField(); + integrateDetector(); +} + +void microscopeStruct::Simulate() +{ + SimulateScattering(); + SimulateImaging(); +} + +void microscopeStruct::SimulateExtendedSource() +{ + + clearDetector(); + + //for each source in the source list + int npts = focalPoints.size(); + float t=0; + for(int i = 0; i +using namespace std; + +struct sourcePoint +{ + bsPoint f; + ptype A; +}; + +struct microscopeStruct +{ + int padding; + + int ss; + + //field slice at the detector + fieldslice Ufd; + fieldslice Ud; + bool scalarSim; + + //detector image + scalarslice* D; //sample + scalarslice* Di; //incident field image + + ptype objective[2]; + + //image position and orientation in world space + rts::rtsQuad pos; + + vector focalPoints; + + microscopeStruct(); + + nearfieldStruct nf; + + void init(); + void destroy(); + + //functions for dealing with extended sources + void SimulateExtendedSource(); + void LoadExtendedSource(std::string filename); + + void SimulateScattering(); + void SimulateImaging(); + void Simulate(); + void setNearfield(); + void setRes(int x_res, int y_res, int pad, int supersampling); + void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); + + void applyBandpass(); + void getFarField(); + void integrateDetector(); + void clearDetector(); + + //compute detector measurements + scalarslice getAbsorbance(); + scalarslice getTransmittance(); + scalarslice getIntensity(); + + + +}; + +#endif diff --git a/montecarlo.cpp b/montecarlo.cpp new file mode 100644 index 0000000..7b9c09f --- /dev/null +++ b/montecarlo.cpp @@ -0,0 +1,70 @@ +#include "montecarlo.h" +#include "rts/rtsQuaternion.h" +#include "rts/rtsMatrix.h" +#include +#include +using namespace std; + +void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) +{ + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling + of a sphere and projecting these samples onto an inscribed sphere. + + samples = rtsPointer to sample vectors specified as normalized cartesian coordinates + N = number of samples + kSph = incident light direction in spherical coordinates + NAin = internal obscuration NA + NAout = outer cassegrain NA + */ + + //get the axis of rotation for transforming (0, 0, 1) to k + //k = -k; + ptype cos_angle = k.dot(bsVector(0, 0, 1)); + rts::rtsMatrix rotation; + if(cos_angle != 1.0) + { + bsVector axis = bsVector(0, 0, 1).cross(k).norm(); + + ptype angle = acos(cos_angle); + rts::rtsQuaternion quat; + quat.CreateRotation(angle, axis); + rotation = quat.toMatrix(); + } + + //find the phi values associated with the cassegrain ring + ptype inPhi = asin(NAin); + ptype outPhi = asin(NAout); + + //cout<<"inPhi: "<(pMin, pMax, normal); +} + +void nearfieldStruct::setRes(int x_res, int y_res) +{ + U.R[0] = Uf.R[0] = x_res; + U.R[1] = Uf.R[1] = y_res; +} + +std::string nearfieldStruct::toStr() +{ + std::stringstream ss; + + ss<<"------Field Parameters-------"< n = mVector[imat](lambda); + //std::cout<<"Sphere refractive index: "< + +#define EPSILON_FLOAT 0.000001 + +//This structure stores values relevant to creating the near field +struct nearfieldStruct +{ + //incident wavelength + ptype lambda; + + //condenser numerical aperature (internal and external) + ptype condenser[2]; + + //amplitude of the incident field + ptype A; + + //position of the focus in 3D space + bsVector k; //cartesian coordinates, normalized + bsPoint focus; + + //slice position and orientation in world space + rts::rtsQuad pos; + + //slices for the focused field + fieldslice Uf; + // and total field: Uf + sum(Us) + fieldslice U; + + //incident field order + int m; + + //flag for a vector simulation + bool scalarSim; + + //flag for a plane wave + bool planeWave; + + + + //---------Scatterers------------ + + //angular resolution of scatterers + // number of divisions in [0 pi] + unsigned int angRes; + + //list of materials + std::vector< rts::material > mVector; + + //list of scatterers + std::vector sVector; + + nearfieldStruct(); + + void init(); + void destroy(); + + void Simulate(); + + void calcSpheres(); + + //plane waves for Monte-Carlo sampling + std::vector inWaves; + int nWaves; + void calcWaves(); + + std::string toStr(); + + void setRes(int x_res, int y_res); + + void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); + + //this function re-computes the focused field + void scalarUf(); + + //compute the field scattered by all of the materials + void scalarUs(); + + //add the incident field to the sum of scattered fields + void sumUf(); + + + + + +}; + + + +#endif diff --git a/nfScalarUf.cu b/nfScalarUf.cu new file mode 100644 index 0000000..ec04ed2 --- /dev/null +++ b/nfScalarUf.cu @@ -0,0 +1,199 @@ +#include "nearfield.h" +#include "rts/sbessel.h" +#include "rts/legendre.h" +#include +#include "rts/cuda_handle_error.h" +#include "rts/cuda_timer.h" + + +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) +{ + /*Compute the scalar focused field using Debye focusing + k = direction of focused light, where |k| = 2*pi/lambda + P = rect struct describing the field slice + rX, rY = resolution of the field slice + cNAin = inner NA of the condenser + cNAout = outer NA of the condenser + */ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / uR; + ptype v = (ptype)iv / vR; + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r = p - f; + //ptype d = r.len(); + + ptype k_dot_r = kmag * k.dot(r); + bsComplex d(0, k_dot_r); + + Uf[i] = exp(d) * A; + +} + +__global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) +{ + /*Compute the scalar focused field using Debye focusing + k = direction of focused light, where |k| = 2*pi/lambda + P = rect struct describing the field slice + rX, rY = resolution of the field slice + cNAin = inner NA of the condenser + cNAout = outer NA of the condenser + */ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / (uR); + ptype v = (ptype)iv / (vR); + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r = p - f; + ptype d = r.len(); + if(d < EPSILON_FLOAT) + { + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); + return; + } + + //get info for the light direction and frequency + //k = k.norm(); + r = r.norm(); + + //compute the imaginary factor i^l + bsComplex im = bsComplex(0, 1); + bsComplex il = bsComplex(1, 0); + + //Bessel and Legendre functions are computed dynamically to save memory + //initialize the Bessel and Legendre functions + ptype j[2]; + ptype kd = kmag * d; + rts::init_sbesselj(kd, j); + + ptype P[2]; + //get the angle between k and r (light direction and position vector) + ptype cosTheta; + cosTheta = k.dot(r); + + //deal with the degenerate case where r == 0 + //if(isnan(cosTheta)) + // cosTheta = 0; + rts::init_legendre(cosTheta, P[0], P[1]); + + //initialize legendre functions for the cassegrain angles + ptype Palpha[3]; + //ptype cosAlpha = cos(asin(cNAin)); + rts::init_legendre(cosAlpha, Palpha[0], Palpha[1]); + Palpha[2] = 1; + + ptype Pbeta[3]; + //ptype cosBeta = cos(asin(cNAout)); + rts::init_legendre(cosBeta, Pbeta[0], Pbeta[1]); + Pbeta[2] = 1; + + //for each order l + bsComplex sumUf(0.0, 0.0); + ptype jl = 0.0; + ptype Pl; + for(int l = 0; l<=nl; l++) + { + + if(l==0) + { + + jl = j[0]; + Pl = P[0]; + } + else if(l==1) + { + jl = j[1]; + Pl = P[1]; + + //adjust the cassegrain Legendre function + Palpha[2] = Palpha[0]; + rts::shift_legendre(l+1, cosAlpha, Palpha[0], Palpha[1]); + Pbeta[2] = Pbeta[0]; + rts::shift_legendre(l+1, cosBeta, Pbeta[0], Pbeta[1]); + } + else + { + rts::shift_sbesselj(l, kd, j);//, j_conv); + rts::shift_legendre(l, cosTheta, P[0], P[1]); + + jl = j[1]; + Pl = P[1]; + + //adjust the cassegrain outer Legendre function + Palpha[2] = Palpha[0]; + rts::shift_legendre(l+1, cosAlpha, Palpha[0], Palpha[1]); + Pbeta[2] = Pbeta[0]; + rts::shift_legendre(l+1, cosBeta, Pbeta[0], Pbeta[1]); + } + + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); + + il *= im; + } + + Uf[i] = sumUf * 2 * PI * A; + +} + +void nearfieldStruct::scalarUf() +{ + //Compute the incident field via a scalar simulation + //This method uses Debye focusing to approximate the field analytically + + //time the calculation of the focused field + //gpuStartTimer(); + + //set the field slice to a scalar field + //Uf.scalarField = true; + + //initialize the GPU arrays + //Uf.init_gpu(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //if we are computing a plane wave, call the gpuScalarUfp function + if(planeWave) + { + gpuScalarUfp<<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); + } + //otherwise compute the condenser info and create a focused field + else + { + //pre-compute the cosine of the obscuration and objective angles + ptype cosAlpha = cos(asin(condenser[0])); + ptype cosBeta = cos(asin(condenser[1])); + //compute the scalar Uf field (this will be in the x_hat channel of Uf) + gpuScalarUf<<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); + } + + //float t = gpuStopTimer(); + //std::cout<<"Scalar Uf Time: "< +#include "rts/cuda_handle_error.h" +#include "rts/cuda_timer.h" + +__device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B) +{ + //initialize the spherical Bessel functions + ptype j[2]; + rts::init_sbesselj(kd, j); + ptype y[2]; + rts::init_sbessely(kd, y); + + //initialize the Legendre polynomial + ptype P[2]; + rts::init_legendre(cos_theta, P[0], P[1]); + + //initialize the spherical Hankel function + bsComplex h((ptype)0, (ptype)0); + + //initialize the result + bsComplex Us((ptype)0, (ptype)0); + + //for each order up to Nl + for(int l=0; l<=Nl; l++) + { + if(l == 0) + { + h.r = j[0]; + h.i = y[0]; + Us += B[0] * h * P[0]; + } + else + { + //shift the bessel functions and legendre polynomials + if(l > 1) + { + rts::shift_legendre(l, cos_theta, P[0], P[1]); + rts::shift_sbesselj(l, kd, j); + rts::shift_sbessely(l, kd, y); + } + + h.r = j[1]; + h.i = y[1]; + Us += B[l] * h * P[1]; + + + } + } + return Us; +} + +__device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* A) +{ + //calculate the internal field of a sphere + + bsComplex Ui((ptype)0, (ptype)0); + + //deal with rtsPoints near zero + if(real(knd) < EPSILON_FLOAT) + { + //for(int l=0; l(knd, j); + + //initialize the Legendre polynomial + ptype P[2]; + rts::init_legendre(cos_theta, P[0], P[1]); + + //for each order up to Nl + for(int l=0; l<=Nl; l++) + { + if(l == 0) + { + Ui += A[0] * j[0] * P[0]; + } + else + { + //shift the bessel functions and legendre polynomials + if(l > 1) + { + rts::shift_legendre(l, cos_theta, P[0], P[1]); + rts::shift_sbesselj(l, knd, j); + } + + Ui += A[l] * j[1] * P[1]; + + + } + } + return Ui; +} + +__global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsPoint f, bsPoint ps, ptype a, bsComplex n, bsComplex* Beta, bsComplex* Alpha, int Nl, ptype A, bsRect ABCD, int uR, int vR) +{ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / (uR); + ptype v = (ptype)iv / (vR); + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r = p - ps; + ptype d = r.len(); + + bsComplex sumUs(0, 0); + //for each plane wave in the wave list + for(int iw = 0; iw < nk; iw++) + { + //normalize the direction vectors and find their inner product + r = r.norm(); + ptype cos_theta = k[iw].dot(r); + + //compute the phase factor for spheres that are not at the origin + bsVector c = ps - f; + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); + + //compute the internal field if we are inside a sphere + if(d <= a) + { + bsComplex knd = kmag * d * n; + sumUs += (1.0/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha); + } + //otherwise compute the scattered field + else + { + //compute the argument for the spherical Hankel function + ptype kd = kmag * d; + sumUs += (1.0/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta); + } + + } + + Us[i] += sumUs; + + +} + +void nearfieldStruct::scalarUs() +{ + //get the number of spheres + int nSpheres = sVector.size(); + + //if there are no spheres, nothing to do here + if(nSpheres == 0) + return; + + //time the calculation of the focused field + //gpuStartTimer(); + + //clear the scattered field + U.clear_gpu(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //for each sphere + int Nl; + for(int s = 0; s>>(U.x_hat, + gpuk, + 1, + 2 * PI / lambda, + focus, + sVector[s].p, + sVector[s].a, + sVector[s].n, + gpuB, + gpuA, + Nl, + A, + pos, + U.R[0], + U.R[1]); + HANDLE_ERROR(cudaFree(gpuk)); + } + //otherwise copy all of the monte-carlo samples to the GPU and compute + else + { + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() )); + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice)); + + //compute the amplitude that makes it through the condenser + ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); + + gpuScalarUsp<<>>(U.x_hat, + gpuk, + inWaves.size(), + 2 * PI / lambda, + focus, + sVector[s].p, + sVector[s].a, + sVector[s].n, + gpuB, + gpuA, + Nl, + subA, + pos, + U.R[0], + U.R[1]); + HANDLE_ERROR(cudaFree(gpuk)); + + + } + + //free memory for scattering coefficients + HANDLE_ERROR(cudaFree(gpuA)); + HANDLE_ERROR(cudaFree(gpuB)); + } + + + + //float t = gpuStopTimer(); + //std::cout<<"Scalar Us Time: "< +#include "rts/cuda_handle_error.h" +#include "rts/cuda_timer.h" + +__global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz, + bsComplex* Ux, bsComplex* Uy, bsComplex* Uz, + bsPoint* ps, ptype* as, int ns, bsRect ABCD, int uR, int vR) +{ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / uR; + ptype v = (ptype)iv / vR; + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r; + ptype d; + + //if we are inside of a sphere, return + for(int is=0; is>>(Uf.x_hat, + Uf.y_hat, + Uf.z_hat, + U.x_hat, + U.y_hat, + U.z_hat, + gpu_p, + gpu_a, + nSpheres, + pos, + U.R[0], + U.R[1]); + + + + //free sphere lists + HANDLE_ERROR(cudaFree(gpu_p)); + HANDLE_ERROR(cudaFree(gpu_a)); + + //float t = gpuStopTimer(); + //std::cout<<"Add Us Time: "< +#include +#include +#include +using namespace std; + +#include +namespace po = boost::program_options; + +static void loadSpheres(string sphereList) +{ + /*This function loads a list of sphere given in the string sphereList + The format is: + x y z a m + where + x, y, z = sphere position (required) + a = sphere radius (required) + m = material ID (optional) */ + + std::stringstream ss(sphereList); + + while(!ss.eof()) + { + //create a new sphere + sphere newS; + + //get the sphere data + ss>>newS.p[0]; + ss>>newS.p[1]; + ss>>newS.p[2]; + ss>>newS.a; + + if(ss.peek() != '\n') + ss>>newS.iMaterial; + + //add the new sphere to the sphere vector + SCOPE->nf.sVector.push_back(newS); + + //ignore the rest of the line + ss.ignore(std::numeric_limits::max(), '\n'); + + //check out the next element (this should set the EOF error flag) + ss.peek(); + } + + + +} + +static void loadSpheres(po::variables_map vm) +{ + //if a files are specified + if(vm.count("sphere-file")) + { + cout<<"Sphere files detected."< filenames = vm["sphere-file"].as< vector >(); + //load each file + for(int iS=0; iS(ifs)), std::istreambuf_iterator()); + + //load the list of spheres from a string + loadSpheres(instr); + } + } + + //load the sphere from the command line + if(vm.count("sx") || vm.count("sy") || vm.count("sz") || vm.count("s")) + { + //create a new sphere + sphere newS; + + //set defaults + if(vm.count("sx")) + newS.p[0] = vm["sx"].as(); + else + newS.p[0] = DEFAULT_SPHERE_X; + + + if(vm.count("sy")) + newS.p[1] = vm["sy"].as(); + else + newS.p[1] = DEFAULT_SPHERE_Y; + + if(vm.count("sz")) + newS.p[2] = vm["sz"].as(); + else + newS.p[2] = DEFAULT_SPHERE_Z; + + if(vm.count("radius")) + newS.a = vm["radius"].as(); + else + newS.a = DEFAULT_SPHERE_A; + + //add the sphere to the sphere vector + SCOPE->nf.sVector.push_back(newS); + + } +} + +static void loadMaterials(po::variables_map vm) +{ + //if materials are specified at the command line + if(vm.count("materials")) + { + vector matVec = vm["materials"].as< vector >(); + if(matVec.size() %2 != 0) + { + cout<<"BIMSim Error: materials must be specified in n, k pairs"< newM(vm["lambda"].as(), matVec[i], matVec[i+1]); + SCOPE->nf.mVector.push_back(newM); + } + } + else + { + //add the command line material as the default (material 0) + rts::material newM(vm["lambda"].as(), vm["n"].as(), vm["k"].as()); + SCOPE->nf.mVector.push_back(newM); + } + + //if file names are specified, load the materials + if(vm.count("material-file")) + { + vector filenames = vm["material-file"].as< vector >(); + for(int i=0; i(ifs)), std::istreambuf_iterator()); + + //load the list of spheres from a string + rts::material newM; + newM.fromStr(instr, ""); + SCOPE->nf.mVector.push_back(newM); + } + } + +} + +static void loadNearfieldParams(po::variables_map vm) +{ + //test to see if we are simulating a plane wave + bool planeWave = DEFAULT_PLANEWAVE; + if(vm.count("plane-wave")) + planeWave = !planeWave; + SCOPE->nf.planeWave = planeWave; + + //get the wavelength + SCOPE->nf.lambda = vm["lambda"].as(); + + //get the incident field amplitude + SCOPE->nf.A = vm["amplitude"].as(); + + //get the condenser parameters + SCOPE->nf.condenser[0] = vm["condenser-min"].as(); + SCOPE->nf.condenser[1] = vm["condenser-max"].as(); + + + //get the focal rtsPoint position + SCOPE->nf.focus[0] = vm["fx"].as(); + SCOPE->nf.focus[1] = vm["fy"].as(); + SCOPE->nf.focus[2] = vm["fz"].as(); + + //get the incident light direction (k-vector) + bsVector spherical; + spherical[0] = 1.0; + spherical[1] = vm["theta"].as(); + spherical[2] = vm["phi"].as(); + SCOPE->nf.k = spherical.sph2cart(); + + + //incident field order + SCOPE->nf.m = vm["field-order"].as(); + + //number of Monte-Carlo samples + SCOPE->nf.nWaves = vm["samples"].as(); + + + +} + +static void loadSliceParams(po::variables_map vm) +{ + //parameters for the sample plane + + + //set the default values for the slice position and orientation + bsPoint pMin(vm["plane-min-x"].as(), vm["plane-min-y"].as(), vm["plane-min-z"].as()); + bsPoint pMax(vm["plane-max-x"].as(), vm["plane-max-y"].as(), vm["plane-max-z"].as()); + bsVector normal(vm["plane-norm-x"].as(), vm["plane-norm-y"].as(), vm["plane-norm-z"].as()); + SCOPE->setPos(pMin, pMax, normal); + + //resolution + SCOPE->setRes(vm["resolution"].as(), + vm["resolution"].as(), + vm["padding"].as(), + vm["supersample"].as()); + + + + + + SCOPE->setNearfield(); + + + +} + +static void loadMicroscopeParams(po::variables_map vm) +{ + //objective + SCOPE->objective[0] = vm["objective-min"].as(); + SCOPE->objective[1] = vm["objective-max"].as(); + + + + + +} + +static void loadOutputParams(po::variables_map vm) +{ + + //image parameters + //component of the field to be saved + std::string fieldStr; + fieldStr = vm["output-type"].as(); + + if(fieldStr == "magnitude") + gFileOut.field = fileoutStruct::fieldMag; + else if(fieldStr == "intensity") + gFileOut.field = fileoutStruct::fieldIntensity; + else if(fieldStr == "polarization") + gFileOut.field = fileoutStruct::fieldPolar; + else if(fieldStr == "imaginary") + gFileOut.field = fileoutStruct::fieldImag; + else if(fieldStr == "real") + gFileOut.field = fileoutStruct::fieldReal; + else if(fieldStr == "angular-spectrum") + gFileOut.field = fileoutStruct::fieldAngularSpectrum; + + + //image file names + gFileOut.intFile = vm["intensity"].as(); + gFileOut.absFile = vm["absorbance"].as(); + gFileOut.transFile = vm["transmittance"].as(); + gFileOut.nearFile = vm["near-field"].as(); + gFileOut.farFile = vm["far-field"].as(); + + //colormap + std::string cmapStr; + cmapStr = vm["colormap"].as(); + if(cmapStr == "brewer") + gFileOut.colormap = rts::colormap::cmBrewer; + else if(cmapStr == "gray") + gFileOut.colormap = rts::colormap::cmGrayscale; + else + cout<<"color-map value not recognized (using default): "<nf.toStr(); + +} + +static void SetOptions(po::options_description &desc) +{ + desc.add_options() + ("help,h", "prints this help") + ("plane-wave,P", "simulates an incident plane wave") + ("intensity,I", po::value()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)") + ("absorbance,A", po::value()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)") + ("transmittance,T", po::value()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)") + ("far-field,F", po::value()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)") + ("near-field,N", po::value()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)") + ("extended-source,X", po::value()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)") + //("sx,x", po::value()->default_value(DEFAULT_SPHERE_X), "sphere coordinates") + //("sy,y", po::value()->default_value(DEFAULT_SPHERE_Y)) + //("sz,z", po::value()->default_value(DEFAULT_SPHERE_Z)) + ("sx,x", po::value(), "sphere coordinates") + ("sy,y", po::value()) + ("sz,z", po::value()) + ("radius,r", po::value()->default_value(DEFAULT_SPHERE_A), "sphere radius") + ("samples,s", po::value()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us") + ("sphere-file,S", po::value< vector >()->multitoken(), "sphere file:\n [x y z radius material]") + ("amplitude,a", po::value()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude") + ("n,n", po::value()->default_value(DEFAULT_N, "1.4"), "sphere phase speed") + ("k,k", po::value()->default_value(DEFAULT_K), "sphere absorption coefficient") + ("material-file,M", po::value< vector >()->multitoken(), "material file:\n [lambda n k]") + ("materials", po::value< vector >()->multitoken(), "materials specified using n, k pairs:\n ex. --materials n1 k1 n2 k2\n (if used --n and --k are ignored)") + ("lambda,l", po::value()->default_value(DEFAULT_LAMBDA), "incident wavelength") + ("theta,t", po::value()->default_value(DEFAULT_K_THETA), "light direction (polar coords)") + ("phi,p", po::value()->default_value(DEFAULT_K_PHI)) + ("fx", po::value()->default_value(DEFAULT_FOCUS_X), "incident focal point") + ("fy", po::value()->default_value(DEFAULT_FOCUS_Y)) + ("fz", po::value()->default_value(DEFAULT_FOCUS_Z)) + ("condenser-max,C", po::value()->default_value(DEFAULT_CONDENSER_MAX), "condenser numerical aperature") + ("condenser-min,c", po::value()->default_value(DEFAULT_CONDENSER_MIN), "condenser obscuration NA") + ("objective-max,O", po::value()->default_value(DEFAULT_OBJECTIVE_MAX), "objective numerical aperature") + ("objective-min,o", po::value()->default_value(DEFAULT_OBJECTIVE_MIN), "objective obscuration NA") + ("field-order", po::value()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field") + ("output-type,f", po::value()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum") + ("resolution,R", po::value()->default_value(DEFAULT_SLICE_RES), "resolution of the detector") + ("padding,d", po::value()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") + ("supersample", po::value()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field") + ("colormap", po::value()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer") + ("append", "append result to an existing file\n (binary files only)") + ("plane-min-x,u", po::value()->default_value(DEFAULT_SLICE_MIN_X), "lower-left corner of the field slice") + ("plane-min-y,v", po::value()->default_value(DEFAULT_SLICE_MIN_Y)) + ("plane-min-z,w", po::value()->default_value(DEFAULT_SLICE_MIN_Z)) + ("plane-max-x,U", po::value()->default_value(DEFAULT_SLICE_MAX_X), "upper-right corner of the field slice") + ("plane-max-y,V", po::value()->default_value(DEFAULT_SLICE_MAX_Y)) + ("plane-max-z,W", po::value()->default_value(DEFAULT_SLICE_MAX_Z)) + ("plane-norm-x", po::value()->default_value(DEFAULT_SLICE_NORM_X), "field slice normal") + ("plane-norm-y", po::value()->default_value(DEFAULT_SLICE_NORM_Y)) + ("plane-norm-z", po::value()->default_value(DEFAULT_SLICE_NORM_Z)); +} + +static void LoadParameters(int argc, char *argv[]) +{ + //create an option description + po::options_description desc("Allowed options"); + + //fill it with options + SetOptions(desc); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + //display help and exit + if(vm.count("help")) + { + cout<() != "") + { + //load the point sources + string filename = vm["extended-source"].as(); + SCOPE->LoadExtendedSource(filename); + + } + + + + + +} diff --git a/rtsEnvi.h b/rtsEnvi.h new file mode 100644 index 0000000..250526c --- /dev/null +++ b/rtsEnvi.h @@ -0,0 +1,610 @@ +#ifndef RTS_ENVI_H +#define RTS_ENVI_H + +#include +#include +#include +#include + +/* This is a class for reading and writing ENVI binary files. This class will be updated as needed. + +What this class CAN currently do: + *) Write band images to a BSQ file. + *) Append band images to a BSQ file. + +*/ + +//information from an ENVI header file +//A good resource can be found here: http://www.exelisvis.com/docs/enviheaderfiles.html +struct EnviHeader +{ + std::string name; + + std::string description; + + unsigned int samples; //x-axis + unsigned int lines; //y-axis + unsigned int bands; //spectral axis + unsigned int header_offset; //header offset for binary file (in bytes) + std::string file_type; //should be "ENVI Standard" + unsigned int data_type; //data representation; common value is 4 (32-bit float) + + enum interleaveType {BIP, BIL, BSQ}; //bip = Z,X,Y; bil = X,Z,Y; bsq = X,Y,Z + interleaveType interleave; + + std::string sensor_type; //not really used + + enum endianType {endianLittle, endianBig}; + endianType byte_order; //little = least significant bit first (most systems) + + double x_start, y_start; //coordinates of the upper-left corner of the image + std::string wavelength_units; //stored wavelength units + std::string z_plot_titles[2]; + + double pixel_size[2]; //pixel size along X and Y + + std::vector band_names; //name for each band in the image + std::vector wavelength; //wavelength for each band + + EnviHeader() + { + name = ""; + + //specify default values for a new or empty ENVI file + samples = 0; + lines = 0; + bands = 0; + header_offset = 0; + data_type = 4; + interleave = BSQ; + byte_order = endianLittle; + x_start = y_start = 0; + pixel_size[0] = pixel_size[1] = 1; + + //strings + file_type = "ENVI Standard"; + sensor_type = "Unknown"; + wavelength_units = "Unknown"; + z_plot_titles[0] = z_plot_titles[1] = "Unknown"; + } + + std::string trim(std::string line) + { + //trims whitespace from the beginning and end of line + int start_i, end_i; + for(start_i=0; start_i < line.length(); start_i++) + if(line[start_i] != 32) + { + break; + } + + for(end_i = line.length()-1; end_i >= start_i; end_i--) + if(line[end_i] != ' ') + { + break; + } + + return line.substr(start_i, end_i - start_i+1); + } + + + std::string get_token(std::string line) + { + //returns a variable name; in this case we look for the '=' sign + size_t i = line.find_first_of('='); + + std::string result; + if(i != std::string::npos) + result = trim(line.substr(0, i-1)); + + return result; + } + + std::string get_data_str(std::string line) + { + size_t i = line.find_first_of('='); + + std::string result; + if(i != std::string::npos) + result = trim(line.substr(i+1)); + else + { + std::cout<<"ENVI Header error - data not found for token: "< get_string_seq(std::string token, std::string sequence) + { + //this function returns a sequence of comma-delimited strings + std::vector result; + + std::string entry; + size_t i; + do + { + i = sequence.find_first_of(','); + entry = sequence.substr(0, i); + sequence = sequence.substr(i+1); + result.push_back(trim(entry)); + }while(i != std::string::npos); + + return result; + } + + std::vector get_double_seq(std::string token, std::string sequence) + { + //this function returns a sequence of comma-delimited strings + std::vector result; + + std::string entry; + size_t i; + do + { + i = sequence.find_first_of(','); + entry = sequence.substr(0, i); + sequence = sequence.substr(i+1); + result.push_back(atof(entry.c_str())); + }while(i != std::string::npos); + + return result; + } + + void load(std::string filename) + { + name = filename; + + //open the header file + std::ifstream file(name.c_str()); + + if(!file) + { + std::cout<<"ENVI header file not found: "< pxsize = get_double_seq(token, string_sequence); + pixel_size[0] = pxsize[0]; + pixel_size[1] = pxsize[1]; + } + else if(token == "z plot titles") + { + std::string string_sequence = get_brace_str(token, line, file); + std::vector titles = get_string_seq(token, string_sequence); + z_plot_titles[0] = titles[0]; + z_plot_titles[1] = titles[1]; + } + + else if(token == "samples") + samples = atoi(get_data_str(line).c_str()); + else if(token == "lines") + lines = atoi(get_data_str(line).c_str()); + else if(token == "bands") + bands = atoi(get_data_str(line).c_str()); + else if(token == "header offset") + header_offset = atoi(get_data_str(line).c_str()); + else if(token == "file type") + file_type = get_data_str(line); + else if(token == "data type") + data_type = atoi(get_data_str(line).c_str()); + else if(token == "interleave") + { + std::string interleave_str = get_data_str(line); + if(interleave_str == "bip") + interleave = BIP; + else if(interleave_str == "bil") + interleave = BIL; + else if(interleave_str == "bsq") + interleave = BSQ; + } + else if(token == "sensor type") + sensor_type = get_data_str(line); + else if(token == "byte order") + byte_order = (endianType)atoi(get_data_str(line).c_str()); + else if(token == "x start") + x_start = atof(get_data_str(line).c_str()); + else if(token == "y start") + y_start = atof(get_data_str(line).c_str()); + else if(token == "wavelength units") + wavelength_units = get_data_str(line); + } + + //close the file + file.close(); + } + + void save(std::string filename) + { + //open a file + std::ofstream outfile(filename.c_str()); + + //write the ENVI type identifier + outfile<<"ENVI"< 0) + { + outfile<<"band names = {"<(S, filename, R[0], R[1], vmin, vmax, cmap); +} + +void scalarslice::toImage(std::string filename, bool positive, rts::colormap::colormapType cmap) +{ + cublasStatus_t stat; + cublasHandle_t handle; + + //create a CUBLAS handle + stat = cublasCreate(&handle); + if(stat != CUBLAS_STATUS_SUCCESS) + { + std::cout<<"CUBLAS Error: initialization failed"< +#include + +using namespace rts; +using namespace std; + +int cbessjyva(double v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp); + +int cbessjyva_sph(int v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp); + +void sphere::calcCoeff(ptype lambda, rtsComplex ri) +{ + /* These calculations are done at high-precision on the CPU + since they are only required once for each wavelength. + + Input: + + lambda = wavelength of the incident field + n = complex refractive index of the sphere + */ + + //clear the previous coefficients + A.clear(); + B.clear(); + + //convert to an std complex value + complex nc(ri.real(), ri.imag()); + n = ri; + + //compute the magnitude of the k vector + double k = 2 * PI / lambda; + complex kna = nc * k * (double)a; + + //compute the arguments k*a and k*n*a + complex ka(k * a, 0.0); + + //allocate space for the Bessel functions of the first and second kind (and derivatives) + int bytes = sizeof(complex) * (Nl + 1); + complex* cjv_ka = (complex*)malloc(bytes); + complex* cyv_ka = (complex*)malloc(bytes); + complex* cjvp_ka = (complex*)malloc(bytes); + complex* cyvp_ka = (complex*)malloc(bytes); + complex* cjv_kna = (complex*)malloc(bytes); + complex* cyv_kna = (complex*)malloc(bytes); + complex* cjvp_kna = (complex*)malloc(bytes); + complex* cyvp_kna = (complex*)malloc(bytes); + + //allocate space for the spherical Hankel functions and derivative + complex* chv_ka = (complex*)malloc(bytes); + complex* chvp_ka = (complex*)malloc(bytes); + + //compute the bessel functions using the CPU-based algorithm + double vm; + cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); + cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); + + + //cout<<"Begin Sphere---------"< +#include +#include +#include +#include "fieldslice.h" +#include "dataTypes.h" + + + +struct sphere +{ + + //sphere position + bsPoint p; + + //sphere radius + ptype a; + + //sphere material index + int iMaterial; + + //rtsPointer to the scattered field produced by a plane wave + // this is a function of cos(theta) and |r| (distance from sphere center) + //fieldslice surface; + + //resolution of the scattered field + int thetaR, rR; + + //sphere order + int Nl; + + //refractive index for the current lambda + bsComplex n; + + //external scattering coefficients + std::vector B; + + //internal scattering coefficients + std::vector A; + + sphere(ptype x = 0.0f, ptype y = 0.0f, ptype z = 0.0f, ptype a = 0.0f, int m = 0, int ang = 128) + { + this->p = bsPoint(x, y, z); + this->a = a; + this->iMaterial = m; + + //surface = fieldslice(ang, ang/2); + } + + std::string toStr() + { + std::stringstream ss; + + ss< n); + + + + +}; + +#endif diff --git a/warnings.cpp b/warnings.cpp new file mode 100644 index 0000000..f9f2949 --- /dev/null +++ b/warnings.cpp @@ -0,0 +1,19 @@ +#include "warnings.h" + +void TestSimulation(nearfieldStruct* nf, microscopeStruct* scope, fileoutStruct* fout) +{ +/* //This function tests the simulation input and outputs any warnings or errors + + + //if the output is a detector image, the type should be intensity or absorbance + if(fout->source == fileoutStruct::imageDetector) + { + if(fout->field != fileoutStruct::fieldIntensity || fout->field != fileoutStruct::fieldAbsorbance) + { + cout<<"The detector only supports intensity and absorbance measurements. Defaulting to intensity."<field = fileoutStruct::fieldIntensity; + } + } + + */ +} diff --git a/warnings.h b/warnings.h new file mode 100644 index 0000000..026685b --- /dev/null +++ b/warnings.h @@ -0,0 +1,5 @@ +#include "nearfield.h" +#include "microscope.h" +#include "fileout.h" + +void TestSimulation(nearfieldStruct* nf, microscopeStruct* scope, fileoutStruct* fout); diff --git a/win32/QtCore4.dll b/win32/QtCore4.dll new file mode 100644 index 0000000..09f5f70 Binary files /dev/null and b/win32/QtCore4.dll differ diff --git a/win32/QtGui4.dll b/win32/QtGui4.dll new file mode 100644 index 0000000..d5f18c2 Binary files /dev/null and b/win32/QtGui4.dll differ diff --git a/win32/bimsim.exe b/win32/bimsim.exe new file mode 100644 index 0000000..78967fd Binary files /dev/null and b/win32/bimsim.exe differ diff --git a/win32/cublas32_50_35.dll b/win32/cublas32_50_35.dll new file mode 100644 index 0000000..d2556d5 Binary files /dev/null and b/win32/cublas32_50_35.dll differ diff --git a/win32/cudart32_50_35.dll b/win32/cudart32_50_35.dll new file mode 100644 index 0000000..deca212 Binary files /dev/null and b/win32/cudart32_50_35.dll differ diff --git a/win32/cufft32_50_35.dll b/win32/cufft32_50_35.dll new file mode 100644 index 0000000..da4b5ca Binary files /dev/null and b/win32/cufft32_50_35.dll differ diff --git a/win32/einstein.jpg b/win32/einstein.jpg new file mode 100644 index 0000000..97b073d Binary files /dev/null and b/win32/einstein.jpg differ diff --git a/win32/mc_spheres.txt b/win32/mc_spheres.txt new file mode 100644 index 0000000..de3f57d --- /dev/null +++ b/win32/mc_spheres.txt @@ -0,0 +1,3 @@ +-3 0 -1 1 0 +0 0 0 1 0 +3 0 1 1 0 \ No newline at end of file diff --git a/win32/spheres.txt b/win32/spheres.txt new file mode 100644 index 0000000..9c8418a --- /dev/null +++ b/win32/spheres.txt @@ -0,0 +1,4 @@ +-3 0 0 1 0 +0 3 0 1 1 +3 0 0 1 2 +0 -3 0 1 3 -- libgit2 0.21.4