From 3f56f1f9aabf59955e4227b2895129e1349a51e2 Mon Sep 17 00:00:00 2001 From: dmayerich Date: Tue, 10 Sep 2013 07:32:14 -0500 Subject: [PATCH] initial commit --- CMakeLists.txt | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FindCUDASDK.cmake | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FindGLEW.cmake | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ FindRTS.cmake | 14 ++++++++++++++ README.md | 21 +++++++++++++++++++++ bessel.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ bessik.cpp | 487 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ bessjy.cpp | 706 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cbessik.cpp | 454 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cbessjy.cpp | 739 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ colormap.h | 229 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dataTypes.h | 39 +++++++++++++++++++++++++++++++++++++++ defaults.h | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fieldslice.cpp | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fieldslice.cu | 293 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fieldslice.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fileout.cu | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fileout.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ gamma.cpp | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.cpp | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ microscope.cu | 303 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ microscope.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ montecarlo.cpp | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ montecarlo.h | 2 ++ nearfield.cpp | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ nearfield.h | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ nfScalarUf.cu | 199 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ nfScalarUs.cu | 259 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ nfSumUf.cu | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ options.h | 398 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ rtsEnvi.h | 610 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scalarslice.cu | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scalarslice.h | 28 ++++++++++++++++++++++++++++ sphere.cpp | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sphere.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ warnings.cpp | 19 +++++++++++++++++++ warnings.h | 5 +++++ win32/QtCore4.dll | Bin 0 -> 2576384 bytes win32/QtGui4.dll | Bin 0 -> 8571392 bytes win32/bimsim.exe | Bin 0 -> 449024 bytes win32/cublas32_50_35.dll | Bin 0 -> 54850920 bytes win32/cudart32_50_35.dll | Bin 0 -> 472424 bytes win32/cufft32_50_35.dll | Bin 0 -> 24233320 bytes win32/einstein.jpg | Bin 0 -> 2564 bytes win32/mc_spheres.txt | 3 +++ win32/spheres.txt | 4 ++++ 46 files changed, 6531 insertions(+), 0 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 FindCUDASDK.cmake create mode 100644 FindGLEW.cmake create mode 100644 FindRTS.cmake create mode 100644 README.md create mode 100644 bessel.h create mode 100644 bessik.cpp create mode 100644 bessjy.cpp create mode 100644 cbessik.cpp create mode 100644 cbessjy.cpp create mode 100644 colormap.h create mode 100644 dataTypes.h create mode 100644 defaults.h create mode 100644 fieldslice.cpp create mode 100644 fieldslice.cu create mode 100644 fieldslice.h create mode 100644 fileout.cu create mode 100644 fileout.h create mode 100644 gamma.cpp create mode 100644 main.cpp create mode 100644 microscope.cu create mode 100644 microscope.h create mode 100644 montecarlo.cpp create mode 100644 montecarlo.h create mode 100644 nearfield.cpp create mode 100644 nearfield.h create mode 100644 nfScalarUf.cu create mode 100644 nfScalarUs.cu create mode 100644 nfSumUf.cu create mode 100644 options.h create mode 100644 rtsEnvi.h create mode 100644 scalarslice.cu create mode 100644 scalarslice.h create mode 100644 sphere.cpp create mode 100644 sphere.h create mode 100644 warnings.cpp create mode 100644 warnings.h create mode 100644 win32/QtCore4.dll create mode 100644 win32/QtGui4.dll create mode 100644 win32/bimsim.exe create mode 100644 win32/cublas32_50_35.dll create mode 100644 win32/cudart32_50_35.dll create mode 100644 win32/cufft32_50_35.dll create mode 100644 win32/einstein.jpg create mode 100644 win32/mc_spheres.txt create mode 100644 win32/spheres.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..7df99da --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,95 @@ +#Specify the version being used aswell as the language +cmake_minimum_required(VERSION 2.8) +#Name your project here +project(bimsim) + +#set the module directory +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") + +#find BOOST +set(Boost_USE_STATIC_LIBS ON) +set(Boost_USE_MULTITHREADED ON) +set(Boost_USE_STATIC_RUNTIME OFF) +find_package( Boost 1.49.0 COMPONENTS program_options ) + +#find the Qt4 +find_package(Qt4 REQUIRED) +include_directories(${QT_INCLUDE_DIRECTORY}) +include(${QT_USE_FILE}) + +#set up CUDA +find_package(CUDA) + +#find OpenGL +#find_package(OpenGL REQUIRED) + +#find GLUT +#set(GLUT_ROOT_PATH $ENV{GLUT_ROOT_PATH}) +#find_package(GLUT REQUIRED) + +#find GLEW +#find_package(GLEW REQUIRED) + +#add Qt OpenGL stuff +#set(QT_USE_QTOPENGL TRUE) + +#ask the user for the RTS location +set(RTS_ROOT_PATH $ENV{RTS_ROOT_PATH}) +find_package(RTS REQUIRED) + +#set the include directories +include_directories( + ${CMAKE_CURRENT_BINARY_DIR} + ${QT_INCLUDES} + ${QT_QTOPENGL_INCLUDE_DIR} +# ${OPENGL_INCLUDE_DIR} +# ${GLEW_INCLUDE_PATH} +# ${GLUT_INCLUDE_DIR} + ${RTS_INCLUDE_DIR} + ${Boost_INCLUDE_DIR} +) + +#enable warnings +if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) + add_definitions(-Wall) +endif() + +#Assign source files to the appropriate variables +file(GLOB SRC_CPP "*.cpp") +file(GLOB SRC_H "*.h") +#file(GLOB SRC_UI "*.ui") +#file(GLOB SRC_QRC "*.qrc") +file(GLOB SRC_CU "*.cu") + +#set up copying data files +#configure_file(rtsSU8_k.txt ${CMAKE_CURRENT_BINARY_DIR}/rtsSU8_k.txt @ONLY) +#configure_file(rtsSU8_n.txt ${CMAKE_CURRENT_BINARY_DIR}/rtsSU8_n.txt @ONLY) +#configure_file(SurfaceMagnitude.glsl ${CMAKE_CURRENT_BINARY_DIR}/SurfaceMagnitude.glsl @ONLY) +#configure_file(SurfaceDeform.glsl ${CMAKE_CURRENT_BINARY_DIR}/SurfaceDeform.glsl @ONLY) + +#determine which source files have to be moc'd +#Qt4_wrap_cpp(UI_MOC ${SRC_H}) +#Qt4_wrap_ui(UI_H ${SRC_UI}) +#Qt4_add_resources(ALL_RCC ${ALL_QRC}) + +#moc the necessary files +#Qt4_automoc(${ALL_CPP}) + +#source_group(QtMoc FILES ${UI_MOC}) +#source_group(QtUI FILES ${SRC_UI}) + +#create an executable +cuda_add_executable(bimsim + ${SRC_CPP} + ${SRC_H} +# ${UI_H} +# ${UI_MOC} +# ${ALL_RCC} + ${SRC_CU} +) + +#set the link libraries +target_link_libraries(bimsim ${QT_LIBRARIES} ${QT_QTOPENGL_LIBRARY} ${OPENGL_gl_LIBRARY} ${OPENGL_glu_LIBRARY} ${GLEW_LIBRARY} ${CUDA_cufft_LIBRARY} ${CUDA_cublas_LIBRARY} ${GLUT_glut_LIBRARY} ${Boost_LIBRARIES}) + + + diff --git a/FindCUDASDK.cmake b/FindCUDASDK.cmake new file mode 100644 index 0000000..f3bee90 --- /dev/null +++ b/FindCUDASDK.cmake @@ -0,0 +1,142 @@ +# +# The script defines the following variables: +# +############################################################################## +# Note: Removed everything related to CUDA_SDK_ROOT_DIR and only left this as +# a possible environment variable to set the SDK directory. +# Include file will be: CUDA_CUT_INCLUDE_DIR +# Cutil library: CUDA_CUT_LIBRARY +############################################################################## +# +# +# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the +# SDK. This script will not directly support finding +# specific libraries or headers, as that isn't +# supported by NVIDIA. If you want to change +# libraries when the path changes see the +# FindCUDA.cmake script for an example of how to clear +# these variables. There are also examples of how to +# use the CUDA_SDK_ROOT_DIR to locate headers or +# libraries, if you so choose (at your own risk). +# +# This code is licensed under the MIT License. See the FindCUDASDK.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### + +# FindCUDASDK.cmake + +# # Check to see if the CUDA_SDK_ROOT_DIR has changed, +# # if it has then clear the cache variable, so that it will be detected again. +# if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}") +# # No specific variables to catch. Use this kind of code before calling +# # find_package(CUDA) to clean up any variables that may depend on this path. +# +# # unset(MY_SPECIAL_CUDA_SDK_INCLUDE_DIR CACHE) +# # unset(MY_SPECIAL_CUDA_SDK_LIBRARY CACHE) +# endif() +# +# ######################## +# # Look for the SDK stuff +# find_path(CUDA_SDK_ROOT_DIR cutil.h +# PATH_SUFFIXES "common/inc" "C/common/inc" +# "$ENV{NVSDKCUDA_ROOT}" +# "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]" +# "/Developer/GPU\ Computing/C" +# ) +# +# # fallback method for determining CUDA_SDK_ROOT_DIR in case the previous one failed! +# if (NOT CUDA_SDK_ROOT_DIR) +# find_path(CUDA_SDK_ROOT_DIR C/common/inc/cutil.h +# "$ENV{NVSDKCUDA_ROOT}" +# "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]" +# "/Developer/GPU\ Computing/C" +# ) +# endif() + +# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the +# environment variables. +set(CUDA_SDK_SEARCH_PATH + "${CUDA_SDK_ROOT_DIR}" + "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2" + "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2" + "${CUDA_TOOLKIT_ROOT_DIR}/NV_SDK" + "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK" + "$ENV{HOME}/NVIDIA_CUDA_SDK" + "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX" + "$ENV{HOME}/NVIDIA_GPU_Computing_SDK" + "/Developer/CUDA" + ) + +# Find include file from the CUDA_SDK_SEARCH_PATH + +find_path(CUDA_CUT_INCLUDE_DIR + cutil.h + PATHS ${CUDA_SDK_SEARCH_PATH} + PATH_SUFFIXES "common/inc" "C/common/inc" + DOC "Location of cutil.h" + NO_DEFAULT_PATH + ) +# Now search system paths +find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h") + +# mark_as_advanced(CUDA_CUT_INCLUDE_DIR) + + +# Example of how to find a library in the CUDA_SDK_ROOT_DIR + +# cutil library is called cutil64 for 64 bit builds on windows. We don't want +# to get these confused, so we are setting the name based on the word size of +# the build. + +# New library might be called cutil_x86_64 ! + +if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(cuda_cutil_name cutil64) +else(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(cuda_cutil_name cutil32) +endif(CMAKE_SIZEOF_VOID_P EQUAL 8) + +find_library(CUDA_CUT_LIBRARY + NAMES ${cuda_cutil_name} cutil cutil_x86_64 cutil_i386 + PATHS ${CUDA_SDK_SEARCH_PATH} + # The new version of the sdk shows up in common/lib, but the old one is in lib + # The very newest installation Path of the SDK is in subdirectory 'C'. Please add this Path to the possible suffixes. + PATH_SUFFIXES "C/lib" "common/lib" "lib" "C/common/lib" "common/lib" + DOC "Location of cutil library" + NO_DEFAULT_PATH + ) +# # Now search system paths +# find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library") +mark_as_advanced(CUDA_CUT_LIBRARY) +set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY}) + +############################# +# Check for required components +if(CUDA_CUT_INCLUDE_DIR) + set(CUDASDK_FOUND TRUE) +endif(CUDA_CUT_INCLUDE_DIR) + +# set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL +# "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE) diff --git a/FindGLEW.cmake b/FindGLEW.cmake new file mode 100644 index 0000000..9fe32e8 --- /dev/null +++ b/FindGLEW.cmake @@ -0,0 +1,51 @@ +# +# Try to find GLEW library and include path. +# Once done this will define +# +# GLEW_FOUND +# GLEW_INCLUDE_PATH +# GLEW_LIBRARY +# + +IF (WIN32) + FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h + $ENV{PROGRAMFILES}/GLEW/include + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/include + DOC "The directory where GL/glew.h resides") + FIND_LIBRARY( GLEW_LIBRARY + NAMES glew GLEW glew32 glew32s + PATHS + $ENV{PROGRAMFILES}/GLEW/lib + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/bin + ${PROJECT_SOURCE_DIR}/src/nvgl/glew/lib + DOC "The GLEW library") +ELSE (WIN32) + FIND_PATH( GLEW_INCLUDE_PATH GL/glew.h + /usr/include + /usr/local/include + /sw/include + /opt/local/include + DOC "The directory where GL/glew.h resides") + FIND_LIBRARY( GLEW_LIBRARY + NAMES GLEW glew + PATHS + /usr/lib64 + /usr/lib + /usr/local/lib64 + /usr/local/lib + /sw/lib + /opt/local/lib + DOC "The GLEW library") +ENDIF (WIN32) + +IF (GLEW_INCLUDE_PATH) + SET( GLEW_FOUND 1 CACHE STRING "Set to 1 if GLEW is found, 0 otherwise") +ELSE (GLEW_INCLUDE_PATH) + SET( GLEW_FOUND 0 CACHE STRING "Set to 1 if GLEW is found, 0 otherwise") +ENDIF (GLEW_INCLUDE_PATH) + +MARK_AS_ADVANCED( + GLEW_FOUND + GLEW_INCLUDE_PATH + GLEW_LIBRARY +) \ No newline at end of file diff --git a/FindRTS.cmake b/FindRTS.cmake new file mode 100644 index 0000000..fbfcc44 --- /dev/null +++ b/FindRTS.cmake @@ -0,0 +1,14 @@ +# Tries to find the RTS include directory + + FIND_PATH( RTS_INCLUDE_DIR NAMES rts_glShaderProgram.h + PATHS + ${CMAKE_CURRENT_SOURCE_DIR}/rts + ${RTS_ROOT_PATH} +) + +IF (RTS_FOUND) + #The following deprecated settings are for backwards compatibility with CMake1.4 + SET (RTS_INCLUDE_PATH ${RTS_INCLUDE_DIR}) +ENDIF(RTS_FOUND) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS(RTS REQUIRED_VARS TRUE RTS_INCLUDE_DIR) diff --git a/README.md b/README.md new file mode 100644 index 0000000..9aefa3e --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +BIM-Sim computes the interactions between an incident electromagnetic field and spherical scatterers using Mie theory. + +A Windows 32 executable is provided in the Win32 directory. All of the necessary runtime libraries are included. Start by running 'ricalc -h' for command-line options. + +This software is designed to be used with CMake, so we recommend using it to build the application. + +This software requires the following libraries for compiling: + +CUDA (including CUFFT and CUBLAS) - NVidia's GPU programming toolkit -- https://developer.nvidia.com/ + +RTS - Real-Time Scraps C++ library (my codebase) -- https://github.com/dmayerich/RTS + +Boost C++ Library (specifically the Program Options library) -- http://www.boost.org/ + +Qt (Nokia's GUI Library) -- https://qt-project.org/ + + + + + + diff --git a/bessel.h b/bessel.h new file mode 100644 index 0000000..a9055df --- /dev/null +++ b/bessel.h @@ -0,0 +1,51 @@ +#ifndef bessH +#define bessH +#define _USE_MATH_DEFINES +#include +#include +using namespace std; +#define eps 1e-15 +#define el 0.5772156649015329 + +int msta1(double x,int mp); +int msta2(double x,int n,int mp); +int bessjy01a(double x,double &j0,double &j1,double &y0,double &y1, + double &j0p,double &j1p,double &y0p,double &y1p); +int bessjy01b(double x,double &j0,double &j1,double &y0,double &y1, + double &j0p,double &j1p,double &y0p,double &y1p); +int bessjyna(int n,double x,int &nm,double *jn,double *yn, + double *jnp,double *ynp); +int bessjynb(int n,double x,int &nm,double *jn,double *yn, + double *jnp,double *ynp); +int bessjyv(double v,double x,double &vm,double *jv,double *yv, + double *jvp,double *yvp); +int bessik01a(double x,double &i0,double &i1,double &k0,double &k1, + double &i0p,double &i1p,double &k0p,double &k1p); +int bessik01b(double x,double &i0,double &i1,double &k0,double &k1, + double &i0p,double &i1p,double &k0p,double &k1p); +int bessikna(int n,double x,int &nm,double *in,double *kn, + double *inp,double *knp); +int bessiknb(int n,double x,int &nm,double *in,double *kn, + double *inp,double *knp); +int bessikv(double v,double x,double &vm,double *iv,double *kv, + double *ivp,double *kvp); +int cbessjy01(complex z,complex &cj0,complex &cj1, + complex &cy0,complex &cy1,complex &cj0p, + complex &cj1p,complex &cy0p,complex &cy1p); +int cbessjyna(int n,complex z,int &nm,complex *cj, + complex *cy,complex *cjp,complex *cyp); +int cbessjynb(int n,complex z,int &nm,complex *cj, + complex *cy,complex *cjp,complex *cyp); +int cbessik01(complexz,complex&ci0,complex&ci1, + complex&ck0,complex&ck1,complex&ci0p, + complex&ci1p,complex&ck0p,complex&ck1p); +int cbessikna(int n,complex z,int &nm,complex *ci, + complex *ck,complex *cip,complex *ckp); +int cbessiknb(int n,complex z,int &nm,complex *ci, + complex *ck,complex *cip,complex *ckp); +int cbessjyva(double v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp); +int cbessikv(double v,complexz,double &vm,complex *civ, + complex *ckv,complex *civp,complex *ckvp); + +#endif diff --git a/bessik.cpp b/bessik.cpp new file mode 100644 index 0000000..322000b --- /dev/null +++ b/bessik.cpp @@ -0,0 +1,487 @@ +// bessik.cpp -- computation of modified Bessel functions In, Kn +// and their derivatives. Algorithms and coefficient values from +// "Computation of Special Functions", Zhang and Jin, John +// Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +#define _USE_MATH_DEFINES +#include +#include "bessel.h" + +double gamma(double x); + +int bessik01a(double x,double &i0,double &i1,double &k0,double &k1, + double &i0p,double &i1p,double &k0p,double &k1p) +{ + double r,x2,ca,cb,ct,ww,w0,xr,xr2; + int k,kz; + static double a[] = { + 0.125, + 7.03125e-2, + 7.32421875e-2, + 1.1215209960938e-1, + 2.2710800170898e-1, + 5.7250142097473e-1, + 1.7277275025845, + 6.0740420012735, + 2.4380529699556e1, + 1.1001714026925e2, + 5.5133589612202e2, + 3.0380905109224e3}; + static double b[] = { + -0.375, + -1.171875e-1, + -1.025390625e-1, + -1.4419555664063e-1, + -2.7757644653320e-1, + -6.7659258842468e-1, + -1.9935317337513, + -6.8839142681099, + -2.7248827311269e1, + -1.2159789187654e2, + -6.0384407670507e2, + -3.3022722944809e3}; + static double a1[] = { + 0.125, + 0.2109375, + 1.0986328125, + 1.1775970458984e1, + 2.1461706161499e2, + 5.9511522710323e3, + 2.3347645606175e5, + 1.2312234987631e7}; + + if (x < 0.0) return 1; + if (x == 0.0) { + i0 = 1.0; + i1 = 0.0; + k0 = 1e308; + k1 = 1e308; + i0p = 0.0; + i1p = 0.5; + k0p = -1e308; + k1p = -1e308; + return 0; + } + x2 = x*x; + if (x <= 18.0) { + i0 = 1.0; + r = 1.0; + for (k=1;k<=50;k++) { + r *= 0.25*x2/(k*k); + i0 += r; + if (fabs(r/i0) < eps) break; + } + i1 = 1.0; + r = 1.0; + for (k=1;k<=50;k++) { + r *= 0.25*x2/(k*(k+1)); + i1 += r; + if (fabs(r/i1) < eps) break; + } + i1 *= 0.5*x; + } + else { + if (x >= 50.0) kz = 7; + else if (x >= 35.0) kz = 9; + else kz = 12; + ca = exp(x)/sqrt(2.0*M_PI*x); + i0 = 1.0; + xr = 1.0/x; + for (k=0;k 40.0) && (n < (int)(0.25*x))) { + h0 = bi0; + h1 = bi1; + for (k=2;k<=n;k++) { + h = -2.0*(k-1.0)*h1/x+h0; + in[k] = h; + h0 = h1; + h1 = h; + } + } + else { + m = msta1(x,200); + if (m < n) nm = m; + else m = msta2(x,n,15); + f0 = 0.0; + f1 = 1.0e-100; + for (k=m;k>=0;k--) { + f = 2.0*(k+1.0)*f1/x+f0; + if (x <= nm) in[k] = f; + f0 = f1; + f1 = f; + } + s0 = bi0/f; + for (k=0;k<=m;k++) { + in[k] *= s0; + } + } + g0 = bk0; + g1 = bk1; + for (k=2;k<=nm;k++) { + g = 2.0*(k-1.0)*g1/x+g0; + kn[k] = g; + g0 = g1; + g1 = g; + } + for (k=2;k<=nm;k++) { + inp[k] = in[k-1]-k*in[k]/x; + knp[k] = -kn[k-1]-k*kn[k]/x; + } + return 0; +} +int bessiknb(int n,double x,int &nm,double *in,double *kn, + double *inp,double *knp) +{ + double s0,bs,f,f0,f1,sk0,a0,bkl,vt,r,g,g0,g1; + int k,kz,m,l; + + if ((x < 0.0) || (n < 0)) return 1; + if (x < eps) { + for (k=0;k<=n;k++) { + in[k] = 0.0; + kn[k] = 1e308; + inp[k] = 0.0; + knp[k] = -1e308; + } + in[0] = 1.0; + inp[1] = 0.5; + return 0; + } + nm = n; + if (n == 0) nm = 1; + m = msta1(x,200); + if (m < nm) nm = m; + else m = msta2(x,nm,15); + bs = 0.0; + sk0 = 0.0; + f0 = 0.0; + f1 = 1.0e-100; + for (k=m;k>=0;k--) { + f = 2.0*(k+1.0)*f1/x+f0; + if (k <= nm) in[k] = f; + if ((k != 0) && (k == 2*(int)(k/2))) { + sk0 += 4.0*f/k; + } + bs += 2.0*f; + f0 = f1; + f1 = f; + } + s0 = exp(x)/(bs-f); + for (k=0;k<=nm;k++) { + in[k] *= s0; + } + if (x <= 8.0) { + kn[0] = -(log(0.5*x)+el)*in[0]+s0*sk0; + kn[1] = (1.0/x-in[1]*kn[0])/in[0]; + } + else { + a0 = sqrt(M_PI_2/x)*exp(-x); + if (x >= 200.0) kz = 6; + else if (x >= 80.0) kz = 8; + else if (x >= 25.0) kz = 10; + else kz = 16; + for (l=0;l<2;l++) { + bkl = 1.0; + vt = 4.0*l; + r = 1.0; + for (k=1;k<=kz;k++) { + r *= 0.125*(vt-pow(2.0*k-1.0,2))/(k*x); + bkl += r; + } + kn[l] = a0*bkl; + } + } + g0 = kn[0]; + g1 = kn[1]; + for (k=2;k<=nm;k++) { + g = 2.0*(k-1.0)*g1/x+g0; + kn[k] = g; + g0 = g1; + g1 = g; + } + inp[0] = in[1]; + knp[0] = -kn[1]; + for (k=1;k<=nm;k++) { + inp[k] = in[k-1]-k*in[k]/x; + knp[k] = -kn[k-1]-k*kn[k]/x; + } + return 0; +} + +// The following program computes the modified Bessel functions +// Iv(x) and Kv(x) for arbitrary positive order. For negative +// order use: +// +// I-v(x) = Iv(x) + 2/pi sin(v pi) Kv(x) +// K-v(x) = Kv(x) +// +int bessikv(double v,double x,double &vm,double *iv,double *kv, + double *ivp,double *kvp) +{ + double x2,v0,piv,vt,a1,v0p,gap,r,bi0,ca,sum; + double f,f1,f2,ct,cs,wa,gan,ww,w0,v0n; + double r1,r2,bk0,bk1,bk2,a2,cb; + int n,k,kz,m; + + if ((v < 0.0) || (x < 0.0)) return 1; + x2 = x*x; + n = (int)v; + v0 = v-n; + if (n == 0) n = 1; + if (x == 0.0) { + for (k=0;k<=n;k++) { + iv[k] = 0.0; + kv[k] = -1e308; + ivp[k] = 0.0; + kvp[k] = 1e308; + } + if (v0 == 0.0) { + iv[0] = 1.0; + ivp[1] = 0.5; + } + vm = v; + return 0; + } + piv = M_PI*v0; + vt = 4.0*v0*v0; + if (v0 == 0.0) { + a1 = 1.0; + } + else { + v0p = 1.0+v0; + gap = gamma(v0p); + a1 = pow(0.5*x,v0)/gap; + } + if (x >= 50.0) kz = 8; + else if (x >= 35.0) kz = 10; + else kz = 14; + if (x <= 18.0) { + bi0 = 1.0; + r = 1.0; + for (k=1;k<=30;k++) { + r *= 0.25*x2/(k*(k+v0)); + bi0 += r; + if (fabs(r/bi0) < eps) break; + } + bi0 *= a1; + } + else { + ca = exp(x)/sqrt(2.0*M_PI*x); + sum = 1.0; + r = 1.0; + for (k=1;k<=kz;k++) { + r *= -0.125*(vt-pow(2.0*k-1.0,2.0))/(k*x); + sum += r; + } + bi0 = ca*sum; + } + m = msta1(x,200); + if (m < n) n = m; + else m = msta2(x,n,15); + f2 = 0.0; + f1 = 1.0e-100; + for (k=m;k>=0;k--) { + f = 2.0*(v0+k+1.0)*f1/x+f2; + if (k <= n) iv[k] = f; + f2 = f1; + f1 = f; + } + cs = bi0/f; + for (k=0;k<=n;k++) { + iv[k] *= cs; + } + ivp[0] = v0*iv[0]/x+iv[1]; + for (k=1;k<=n;k++) { + ivp[k] = -(k+v0)*iv[k]/x+iv[k-1]; + } + ww = 0.0; + if (x <= 9.0) { + if (v0 == 0.0) { + ct = -log(0.5*x)-el; + cs = 0.0; + w0 = 0.0; + r = 1.0; + for (k=1;k<=50;k++) { + w0 += 1.0/k; + r *= 0.25*x2/(k*k); + cs += r*(w0+ct); + wa = fabs(cs); + if (fabs((wa-ww)/wa) < eps) break; + ww = wa; + } + bk0 = ct+cs; + } + else { + v0n = 1.0-v0; + gan = gamma(v0n); + a2 = 1.0/(gan*pow(0.5*x,v0)); + a1 = pow(0.5*x,v0)/gap; + sum = a2-a1; + r1 = 1.0; + r2 = 1.0; + for (k=1;k<=120;k++) { + r1 *= 0.25*x2/(k*(k-v0)); + r2 *= 0.25*x2/(k*(k+v0)); + sum += a2*r1-a1*r2; + wa = fabs(sum); + if (fabs((wa-ww)/wa) < eps) break; + ww = wa; + } + bk0 = M_PI_2*sum/sin(piv); + } + } + else { + cb = exp(-x)*sqrt(M_PI_2/x); + sum = 1.0; + r = 1.0; + for (k=1;k<=kz;k++) { + r *= 0.125*(vt-pow(2.0*k-1.0,2.0))/(k*x); + sum += r; + } + bk0 = cb*sum; + } + bk1 = (1.0/x-iv[1]*bk0)/iv[0]; + kv[0] = bk0; + kv[1] = bk1; + for (k=2;k<=n;k++) { + bk2 = 2.0*(v0+k-1.0)*bk1/x+bk0; + kv[k] = bk2; + bk0 = bk1; + bk1 = bk2; + } + kvp[0] = v0*kv[0]/x-kv[1]; + for (k=1;k<=n;k++) { + kvp[k] = -(k+v0)*kv[k]/x-kv[k-1]; + } + vm = n+v0; + return 0; +} diff --git a/bessjy.cpp b/bessjy.cpp new file mode 100644 index 0000000..2dc98af --- /dev/null +++ b/bessjy.cpp @@ -0,0 +1,706 @@ +// bessjy.cpp -- computation of Bessel functions Jn, Yn and their +// derivatives. Algorithms and coefficient values from +// "Computation of Special Functions", Zhang and Jin, John +// Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +// Note that 'math.h' provides (or should provide) values for: +// pi M_PI +// 2/pi M_2_PI +// pi/4 M_PI_4 +// pi/2 M_PI_2 +// +#define _USE_MATH_DEFINES +#include +#include "bessel.h" + +double gamma(double x); +// +// INPUT: +// double x -- argument of Bessel function +// +// OUTPUT (via address rtsPointers): +// double j0 -- Bessel function of 1st kind, 0th order +// double j1 -- Bessel function of 1st kind, 1st order +// double y0 -- Bessel function of 2nd kind, 0th order +// double y1 -- Bessel function of 2nd kind, 1st order +// double j0p -- derivative of Bessel function of 1st kind, 0th order +// double j1p -- derivative of Bessel function of 1st kind, 1st order +// double y0p -- derivative of Bessel function of 2nd kind, 0th order +// double y1p -- derivative of Bessel function of 2nd kind, 1st order +// +// RETURN: +// int error code: 0 = OK, 1 = error +// +// This algorithm computes the above functions using series expansions. +// +int bessjy01a(double x,double &j0,double &j1,double &y0,double &y1, + double &j0p,double &j1p,double &y0p,double &y1p) +{ + double x2,r,ec,w0,w1,r0,r1,cs0,cs1; + double cu,p0,q0,p1,q1,t1,t2; + int k,kz; + static double a[] = { + -7.03125e-2, + 0.112152099609375, + -0.5725014209747314, + 6.074042001273483, + -1.100171402692467e2, + 3.038090510922384e3, + -1.188384262567832e5, + 6.252951493434797e6, + -4.259392165047669e8, + 3.646840080706556e10, + -3.833534661393944e12, + 4.854014686852901e14, + -7.286857349377656e16, + 1.279721941975975e19}; + static double b[] = { + 7.32421875e-2, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1, + 5.513358961220206e2, + -1.825775547429318e4, + 8.328593040162893e5, + -5.006958953198893e7, + 3.836255180230433e9, + -3.649010818849833e11, + 4.218971570284096e13, + -5.827244631566907e15, + 9.476288099260110e17, + -1.792162323051699e20}; + static double a1[] = { + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947, + 1.215978918765359e2, + -3.302272294480852e3, + 1.276412726461746e5, + -6.656367718817688e6, + 4.502786003050393e8, + -3.833857520742790e10, + 4.011838599133198e12, + -5.060568503314727e14, + 7.572616461117958e16, + -1.326257285320556e19}; + static double b1[] = { + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1, + -6.038440767050702e2, + 1.971837591223663e4, + -8.902978767070678e5, + 5.310411010968522e7, + -4.043620325107754e9, + 3.827011346598605e11, + -4.406481417852278e13, + 6.065091351222699e15, + -9.833883876590679e17, + 1.855045211579828e20}; + + if (x < 0.0) return 1; + if (x == 0.0) { + j0 = 1.0; + j1 = 0.0; + y0 = -1e308; + y1 = -1e308; + j0p = 0.0; + j1p = 0.5; + y0p = 1e308; + y1p = 1e308; + return 0; + } + x2 = x*x; + if (x <= 12.0) { + j0 = 1.0; + r = 1.0; + for (k=1;k<=30;k++) { + r *= -0.25*x2/(k*k); + j0 += r; + if (fabs(r) < fabs(j0)*1e-15) break; + } + j1 = 1.0; + r = 1.0; + for (k=1;k<=30;k++) { + r *= -0.25*x2/(k*(k+1)); + j1 += r; + if (fabs(r) < fabs(j1)*1e-15) break; + } + j1 *= 0.5*x; + ec = log(0.5*x)+el; + cs0 = 0.0; + w0 = 0.0; + r0 = 1.0; + for (k=1;k<=30;k++) { + w0 += 1.0/k; + r0 *= -0.25*x2/(k*k); + r = r0 * w0; + cs0 += r; + if (fabs(r) < fabs(cs0)*1e-15) break; + } + y0 = M_2_PI*(ec*j0-cs0); + cs1 = 1.0; + w1 = 0.0; + r1 = 1.0; + for (k=1;k<=30;k++) { + w1 += 1.0/k; + r1 *= -0.25*x2/(k*(k+1)); + r = r1*(2.0*w1+1.0/(k+1)); + cs1 += r; + if (fabs(r) < fabs(cs1)*1e-15) break; + } + y1 = M_2_PI * (ec*j1-1.0/x-0.25*x*cs1); + } + else { + if (x >= 50.0) kz = 8; // Can be changed to 10 + else if (x >= 35.0) kz = 10; // " " 12 + else kz = 12; // " " 14 + t1 = x-M_PI_4; + p0 = 1.0; + q0 = -0.125/x; + for (k=0;k=0;k--) { + f = 2.0*(k+1.0)/x*f1-f2; + if (k <= nm) jn[k] = f; + f2 = f1; + f1 = f; + } + if (fabs(bj0) > fabs(bj1)) cs = bj0/f; + else cs = bj1/f2; + for (k=0;k<=nm;k++) { + jn[k] *= cs; + } + } + for (k=2;k<=nm;k++) { + jnp[k] = jn[k-1]-k*jn[k]/x; + } + f0 = yn[0]; + f1 = yn[1]; + for (k=2;k<=nm;k++) { + f = 2.0*(k-1.0)*f1/x-f0; + yn[k] = f; + f0 = f1; + f1 = f; + } + for (k=2;k<=nm;k++) { + ynp[k] = yn[k-1]-k*yn[k]/x; + } + return 0; +} +// +// Same input and output conventions as above. Different recurrence +// relations used for 'x' < 300. +// +int bessjynb(int n,double x,int &nm,double *jn,double *yn, + double *jnp,double *ynp) +{ + double t1,t2,f,f1,f2,bj0,bj1,bjk,by0,by1,cu,s0,su,sv; + double ec,bs,byk,p0,p1,q0,q1; + static double a[] = { + -0.7031250000000000e-1, + 0.1121520996093750, + -0.5725014209747314, + 6.074042001273483}; + static double b[] = { + 0.7324218750000000e-1, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1}; + static double a1[] = { + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947}; + static double b1[] = { + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1}; + + int i,k,m; + nm = n; + if ((x < 0.0) || (n < 0)) return 1; + if (x < 1e-15) { + for (i=0;i<=n;i++) { + jn[i] = 0.0; + yn[i] = -1e308; + jnp[i] = 0.0; + ynp[i] = 1e308; + } + jn[0] = 1.0; + jnp[1] = 0.5; + return 0; + } + if (x <= 300.0 || n > (int)(0.9*x)) { + if (n == 0) nm = 1; + m = msta1(x,200); + if (m < nm) nm = m; + else m = msta2(x,nm,15); + bs = 0.0; + su = 0.0; + sv = 0.0; + f2 = 0.0; + f1 = 1.0e-100; + for (k = m;k>=0;k--) { + f = 2.0*(k+1.0)/x*f1 - f2; + if (k <= nm) jn[k] = f; + if ((k == 2*(int)(k/2)) && (k != 0)) { + bs += 2.0*f; +// su += pow(-1,k>>1)*f/(double)k; + su += (-1)*((k & 2)-1)*f/(double)k; + } + else if (k > 1) { +// sv += pow(-1,k>>1)*k*f/(k*k-1.0); + sv += (-1)*((k & 2)-1)*(double)k*f/(k*k-1.0); + } + f2 = f1; + f1 = f; + } + s0 = bs+f; + for (k=0;k<=nm;k++) { + jn[k] /= s0; + } + ec = log(0.5*x) +0.5772156649015329; + by0 = M_2_PI*(ec*jn[0]-4.0*su/s0); + yn[0] = by0; + by1 = M_2_PI*((ec-1.0)*jn[1]-jn[0]/x-4.0*sv/s0); + yn[1] = by1; + } + else { + t1 = x-M_PI_4; + p0 = 1.0; + q0 = -0.125/x; + for (k=0;k<4;k++) { + p0 += a[k]*pow(x,-2*k-2); + q0 += b[k]*pow(x,-2*k-3); + } + cu = sqrt(M_2_PI/x); + bj0 = cu*(p0*cos(t1)-q0*sin(t1)); + by0 = cu*(p0*sin(t1)+q0*cos(t1)); + jn[0] = bj0; + yn[0] = by0; + t2 = x-0.75*M_PI; + p1 = 1.0; + q1 = 0.375/x; + for (k=0;k<4;k++) { + p1 += a1[k]*pow(x,-2*k-2); + q1 += b1[k]*pow(x,-2*k-3); + } + bj1 = cu*(p1*cos(t2)-q1*sin(t2)); + by1 = cu*(p1*sin(t2)+q1*cos(t2)); + jn[1] = bj1; + yn[1] = by1; + for (k=2;k<=nm;k++) { + bjk = 2.0*(k-1.0)*bj1/x-bj0; + jn[k] = bjk; + bj0 = bj1; + bj1 = bjk; + } + } + jnp[0] = -jn[1]; + for (k=1;k<=nm;k++) { + jnp[k] = jn[k-1]-k*jn[k]/x; + } + for (k=2;k<=nm;k++) { + byk = 2.0*(k-1.0)*by1/x-by0; + yn[k] = byk; + by0 = by1; + by1 = byk; + } + ynp[0] = -yn[1]; + for (k=1;k<=nm;k++) { + ynp[k] = yn[k-1]-k*yn[k]/x; + } + return 0; + +} + +// The following routine computes Bessel Jv(x) and Yv(x) for +// arbitrary positive order (v). For negative order, use: +// +// J-v(x) = Jv(x)cos(v pi) - Yv(x)sin(v pi) +// Y-v(x) = Jv(x)sin(v pi) + Yv(x)cos(v pi) +// +int bessjyv(double v,double x,double &vm,double *jv,double *yv, + double *djv,double *dyv) +{ + double v0,vl,vg,vv,a,a0,r,x2,bjv0,bjv1,bjvl,f,f0,f1,f2; + double r0,r1,ck,cs,cs0,cs1,sk,qx,px,byv0,byv1,rp,xk,rq; + double b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; + int j,k,l,m,n,kz; + + x2 = x*x; + n = (int)v; + v0 = v-n; + if ((x < 0.0) || (v < 0.0)) return 1; + if (x < 1e-15) { + for (k=0;k<=n;k++) { + jv[k] = 0.0; + yv[k] = -1e308; + djv[k] = 0.0; + dyv[k] = 1e308; + if (v0 == 0.0) { + jv[0] = 1.0; + djv[1] = 0.5; + } + else djv[0] = 1e308; + } + vm = v; + return 0; + } + if (x <= 12.0) { + for (l=0;l<2;l++) { + vl = v0 + l; + bjvl = 1.0; + r = 1.0; + for (k=1;k<=40;k++) { + r *= -0.25*x2/(k*(k+vl)); + bjvl += r; + if (fabs(r) < fabs(bjvl)*1e-15) break; + } + vg = 1.0 + vl; + a = pow(0.5*x,vl)/gamma(vg); + if (l == 0) bjv0 = bjvl*a; + else bjv1 = bjvl*a; + } + } + else { + if (x >= 50.0) kz = 8; + else if (x >= 35.0) kz = 10; + else kz = 11; + for (j=0;j<2;j++) { + vv = 4.0*(j+v0)*(j+v0); + px = 1.0; + rp = 1.0; + for (k=1;k<=kz;k++) { + rp *= (-0.78125e-2)*(vv-pow(4.0*k-3.0,2.0))* + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*x2); + px += rp; + } + qx = 1.0; + rq = 1.0; + for (k=1;k<=kz;k++) { + rq *= (-0.78125e-2)*(vv-pow(4.0*k-1.0,2.0))* + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*x2); + qx += rq; + } + qx *= 0.125*(vv-1.0)/x; + xk = x-(0.5*(j+v0)+0.25)*M_PI; + a0 = sqrt(M_2_PI/x); + ck = cos(xk); + sk = sin(xk); + + if (j == 0) { + bjv0 = a0*(px*ck-qx*sk); + byv0 = a0*(px*sk+qx*ck); + } + else if (j == 1) { + bjv1 = a0*(px*ck-qx*sk); + byv1 = a0*(px*sk+qx*ck); + } + } + } + jv[0] = bjv0; + jv[1] = bjv1; + djv[0] = v0*jv[0]/x-jv[1]; + djv[1] = -(1.0+v0)*jv[1]/x+jv[0]; + if ((n >= 2) && (n <= (int)(0.9*x))) { + f0 = bjv0; + f1 = bjv1; + for (k=2;k<=n;k++) { + f = 2.0*(k+v0-1.0)*f1/x-f0; + jv[k] = f; + f0 = f1; + f1 = f; + } + } + else if (n >= 2) { + m = msta1(x,200); + if (m < n) n = m; + else m = msta2(x,n,15); + f2 = 0.0; + f1 = 1.0e-100; + for (k=m;k>=0;k--) { + f = 2.0*(v0+k+1.0)*f1/x-f2; + if (k <= n) jv[k] = f; + f2 = f1; + f1 = f; + } + if (fabs(bjv0) > fabs(bjv1)) cs = bjv0/f; + else cs = bjv1/f2; + for (k=0;k<=n;k++) { + jv[k] *= cs; + } + } + for (k=2;k<=n;k++) { + djv[k] = -(k+v0)*jv[k]/x+jv[k-1]; + } + if (x <= 12.0) { + if (v0 != 0.0) { + for (l=0;l<2;l++) { + vl = v0 +l; + bjvl = 1.0; + r = 1.0; + for (k=1;k<=40;k++) { + r *= -0.25*x2/(k*(k-vl)); + bjvl += r; + if (fabs(r) < fabs(bjvl)*1e-15) break; + } + vg = 1.0-vl; + b = pow(2.0/x,vl)/gamma(vg); + if (l == 0) bju0 = bjvl*b; + else bju1 = bjvl*b; + } + pv0 = M_PI*v0; + pv1 = M_PI*(1.0+v0); + byv0 = (bjv0*cos(pv0)-bju0)/sin(pv0); + byv1 = (bjv1*cos(pv1)-bju1)/sin(pv1); + } + else { + ec = log(0.5*x)+el; + cs0 = 0.0; + w0 = 0.0; + r0 = 1.0; + for (k=1;k<=30;k++) { + w0 += 1.0/k; + r0 *= -0.25*x2/(k*k); + cs0 += r0*w0; + } + byv0 = M_2_PI*(ec*bjv0-cs0); + cs1 = 1.0; + w1 = 0.0; + r1 = 1.0; + for (k=1;k<=30;k++) { + w1 += 1.0/k; + r1 *= -0.25*x2/(k*(k+1)); + cs1 += r1*(2.0*w1+1.0/(k+1.0)); + } + byv1 = M_2_PI*(ec*bjv1-1.0/x-0.25*x*cs1); + } + } + yv[0] = byv0; + yv[1] = byv1; + for (k=2;k<=n;k++) { + byvk = 2.0*(v0+k-1.0)*byv1/x-byv0; + yv[k] = byvk; + byv0 = byv1; + byv1 = byvk; + } + dyv[0] = v0*yv[0]/x-yv[1]; + for (k=1;k<=n;k++) { + dyv[k] = -(k+v0)*yv[k]/x+yv[k-1]; + } + vm = n + v0; + return 0; +} + diff --git a/cbessik.cpp b/cbessik.cpp new file mode 100644 index 0000000..524c3f2 --- /dev/null +++ b/cbessik.cpp @@ -0,0 +1,454 @@ +// cbessik.cpp -- complex modified Bessel functions. +// Algorithms and coefficient values from "Computation of Special +// Functions", Zhang and Jin, John Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +#include +using namespace std; +#include "bessel.h" + +static complex cii(0.0,1.0); +static complex czero(0.0,0.0); +static complex cone(1.0,0.0); + +double gamma(double x); + +int cbessik01(complexz,complex&ci0,complex&ci1, + complex&ck0,complex&ck1,complex&ci0p, + complex&ci1p,complex&ck0p,complex&ck1p) +{ + complex z1,z2,zr,zr2,cr,ca,cb,cs,ct,cw; + double a0,w0; + int k,kz; + static double a[] = { + 0.125, + 7.03125e-2, + 7.32421875e-2, + 1.1215209960938e-1, + 2.2710800170898e-1, + 5.7250142097473e-1, + 1.7277275025845, + 6.0740420012735, + 2.4380529699556e1, + 1.1001714026925e2, + 5.5133589612202e2, + 3.0380905109224e3}; + static double b[] = { + -0.375, + -1.171875e-1, + -1.025390625e-1, + -1.4419555664063e-1, + -2.7757644653320e-1, + -6.7659258842468e-1, + -1.9935317337513, + -6.8839142681099, + -2.7248827311269e1, + -1.2159789187654e2, + -6.0384407670507e2, + -3.3022722944809e3}; + static double a1[] = { + 0.125, + 0.2109375, + 1.0986328125, + 1.1775970458984e1, + 2.1461706161499e2, + 5.9511522710323e3, + 2.3347645606175e5, + 1.2312234987631e7, + 8.401390346421e08, + 7.2031420482627e10}; + + a0 = abs(z); + z2 = z*z; + z1 = z; + if (a0 == 0.0) { + ci0 = cone; + ci1 = czero; + ck0 = complex (1e308,0); + ck1 = complex (1e308,0); + ci0p = czero; + ci1p = complex(0.5,0.0); + ck0p = complex(-1e308,0); + ck1p = complex(-1e308,0); + return 0; + } + if (real(z) < 0.0) z1 = -z; + if (a0 <= 18.0) { + ci0 = cone; + cr = cone; + for (k=1;k<=50;k++) { + cr *= 0.25*z2/(double)(k*k); + ci0 += cr; + if (abs(cr/ci0) < eps) break; + } + ci1 = cone; + cr = cone; + for (k=1;k<=50;k++) { + cr *= 0.25*z2/(double)(k*(k+1.0)); + ci1 += cr; + if (abs(cr/ci1) < eps) break; + } + ci1 *= 0.5*z1; + } + else { + if (a0 >= 50.0) kz = 7; + else if (a0 >= 35.0) kz = 9; + else kz = 12; + ca = exp(z1)/sqrt(2.0*M_PI*z1); + ci0 = cone; + zr = 1.0/z1; + for (k=0;k 0.0) { + ck0 -= cii*M_PI*ci0; + ck1 = -ck1-cii*M_PI*ci1; + } + ci1 = -ci1; + } + ci0p = ci1; + ci1p = ci0-1.0*ci1/z; + ck0p = -ck1; + ck1p = -ck0-1.0*ck1/z; + return 0; +} +int cbessikna(int n,complex z,int &nm,complex *ci, + complex *ck,complex *cip,complex *ckp) +{ + complex ci0,ci1,ck0,ck1,ckk,cf,cf1,cf2,cs; + double a0; + int k,m,ecode; + a0 = abs(z); + nm = n; + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + ci[k] = czero; + ck[k] = complex(-1e308,0); + cip[k] = czero; + ckp[k] = complex(1e308,0); + } + ci[0] = cone; + cip[1] = complex(0.5,0.0); + return 0; + } + ecode = cbessik01(z,ci[0],ci[1],ck[0],ck[1],cip[0],cip[1],ckp[0],ckp[1]); + if (n < 2) return 0; + ci0 = ci[0]; + ci1 = ci[1]; + ck0 = ck[0]; + ck1 = ck[1]; + m = msta1(a0,200); + if (m < n) nm = m; + else m = msta2(a0,n,15); + cf2 = czero; + cf1 = complex(1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(k+1.0)*cf1/z+cf2; + if (k <= nm) ci[k] = cf; + cf2 = cf1; + cf1 = cf; + } + cs = ci0/cf; + for (k=0;k<=nm;k++) { + ci[k] *= cs; + } + for (k=2;k<=nm;k++) { + if (abs(ci[k-1]) > abs(ci[k-2])) { + ckk = (1.0/z-ci[k]*ck[k-1])/ci[k-1]; + } + else { + ckk = (ci[k]*ck[k-2]+2.0*(k-1.0)/(z*z))/ci[k-2]; + } + ck[k] = ckk; + } + for (k=2;k<=nm;k++) { + cip[k] = ci[k-1]-(double)k*ci[k]/z; + ckp[k] = -ck[k-1]-(double)k*ck[k]/z; + } + return 0; +} +int cbessiknb(int n,complex z,int &nm,complex *ci, + complex *ck,complex *cip,complex *ckp) +{ + complex z1,cbs,csk0,cf,cf0,cf1,ca0,cbkl; + complex cg,cg0,cg1,cs0,cs,cr; + double a0,vt,fac; + int k,kz,l,m; + + a0 = abs(z); + nm = n; + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + ci[k] = czero; + ck[k] = complex(1e308,0); + cip[k] = czero; + ckp[k] = complex(-1e308,0); + } + ci[0] = complex(1.0,0.0); + cip[1] = complex(0.5,0.0); + return 0; + } + z1 = z; + if (real(z) < 0.0) z1 = -z; + if (n == 0) nm = 1; + m = msta1(a0,200); + if (m < nm) nm = m; + else m = msta2(a0,nm,15); + cbs = czero; + csk0 = czero; + cf0 = czero; + cf1 = complex(1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(k+1.0)*cf1/z1+cf0; + if (k <=nm) ci[k] = cf; + if ((k != 0) && (k == 2*(k>>1))) csk0 += 4.0*cf/(double)k; + cbs += 2.0*cf; + cf0 = cf1; + cf1 = cf; + } + cs0 = exp(z1)/(cbs-cf); + for (k=0;k<=nm;k++) { + ci[k] *= cs0; + } + if (a0 <= 9.0) { + ck[0] = -(log(0.5*z1)+el)*ci[0]+cs0*csk0; + ck[1] = (1.0/z1-ci[1]*ck[0])/ci[0]; + } + else { + ca0 = sqrt(M_PI_2/z1)*exp(-z1); + if (a0 >= 200.0) kz = 6; + else if (a0 >= 80.0) kz = 8; + else if (a0 >= 25.0) kz = 10; + else kz = 16; + for (l=0;l<2;l++) { + cbkl = cone; + vt = 4.0*l; + cr = cone; + for (k=1;k<=kz;k++) { + cr *= 0.125*(vt-pow(2.0*k-1.0,2.0))/((double)k*z1); + cbkl += cr; + } + ck[l] = ca0*cbkl; + } + } + cg0 = ck[0]; + cg1 = ck[1]; + for (k=2;k<=nm;k++) { + cg = 2.0*(k-1.0)*cg1/z1+cg0; + ck[k] = cg; + cg0 = cg1; + cg1 = cg; + } + if (real(z) < 0.0) { + fac = 1.0; + for (k=0;k<=nm;k++) { + if (imag(z) < 0.0) { + ck[k] = fac*ck[k]+cii*M_PI*ci[k]; + } + else { + ck[k] = fac*ck[k]-cii*M_PI*ci[k]; + } + ci[k] *= fac; + fac = -fac; + } + } + cip[0] = ci[1]; + ckp[0] = -ck[1]; + for (k=1;k<=nm;k++) { + cip[k] = ci[k-1]-(double)k*ci[k]/z; + ckp[k] = -ck[k-1]-(double)k*ck[k]/z; + } + return 0; +} +int cbessikv(double v,complexz,double &vm,complex *civ, + complex *ckv,complex *civp,complex *ckvp) +{ + complex z1,z2,ca1,ca,cs,cr,ci0,cbi0,cf,cf1,cf2; + complex ct,cp,cbk0,ca2,cr1,cr2,csu,cws,cb; + complex cg0,cg1,cgk,cbk1,cvk; + double a0,v0,v0p,v0n,vt,w0,piv,gap,gan; + int m,n,k,kz; + + a0 = abs(z); + z1 = z; + z2 = z*z; + n = (int)v; + v0 = v-n; + piv = M_PI*v0; + vt = 4.0*v0*v0; + if (n == 0) n = 1; + if (a0 < 1e-100) { + for (k=0;k<=n;k++) { + civ[k] = czero; + ckv[k] = complex(-1e308,0); + civp[k] = czero; + ckvp[k] = complex(1e308,0); + } + if (v0 == 0.0) { + civ[0] = cone; + civp[1] = complex (0.5,0.0); + } + vm = v; + return 0; + } + if (a0 >= 50.0) kz = 8; + else if (a0 >= 35.0) kz = 10; + else kz = 14; + if (real(z) <= 0.0) z1 = -z; + if (a0 < 18.0) { + if (v0 == 0.0) { + ca1 = cone; + } + else { + v0p = 1.0+v0; + gap = gamma(v0p); + ca1 = pow(0.5*z1,v0)/gap; + } + ci0 = cone; + cr = cone; + for (k=1;k<=50;k++) { + cr *= 0.25*z2/(k*(k+v0)); + ci0 += cr; + if (abs(cr/ci0) < eps) break; + } + cbi0 = ci0*ca1; + } + else { + ca = exp(z1)/sqrt(2.0*M_PI*z1); + cs = cone; + cr = cone; + for (k=1;k<=kz;k++) { + cr *= -0.125*(vt-pow(2.0*k-1.0,2.0))/((double)k*z1); + cs += cr; + } + cbi0 = ca*cs; + } + m = msta1(a0,200); + if (m < n) n = m; + else m = msta2(a0,n,15); + cf2 = czero; + cf1 = complex(1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(v0+k+1.0)*cf1/z1+cf2; + if (k <= n) civ[k] = cf; + cf2 = cf1; + cf1 = cf; + } + cs = cbi0/cf; + for (k=0;k<=n;k++) { + civ[k] *= cs; + } + if (a0 <= 9.0) { + if (v0 == 0.0) { + ct = -log(0.5*z1)-el; + cs = czero; + w0 = 0.0; + cr = cone; + for (k=1;k<=50;k++) { + w0 += 1.0/k; + cr *= 0.25*z2/(double)(k*k); + cp = cr*(w0+ct); + cs += cp; + if ((k >= 10) && (abs(cp/cs) < eps)) break; + } + cbk0 = ct+cs; + } + else { + v0n = 1.0-v0; + gan = gamma(v0n); + ca2 = 1.0/(gan*pow(0.5*z1,v0)); + ca1 = pow(0.5*z1,v0)/gap; + csu = ca2-ca1; + cr1 = cone; + cr2 = cone; + cws = czero; + for (k=1;k<=50;k++) { + cr1 *= 0.25*z2/(k*(k-v0)); + cr2 *= 0.25*z2/(k*(k+v0)); + csu += ca2*cr1-ca1*cr2; + if ((k >= 10) && (abs((cws-csu)/csu) < eps)) break; + cws = csu; + } + cbk0 = csu*M_PI_2/sin(piv); + } + } + else { + cb = exp(-z1)*sqrt(M_PI_2/z1); + cs = cone; + cr = cone; + for (k=1;k<=kz;k++) { + cr *= 0.125*(vt-pow(2.0*k-1.0,2.0))/((double)k*z1); + cs += cr; + } + cbk0 = cb*cs; + } + cbk1 = (1.0/z1-civ[1]*cbk0)/civ[0]; + ckv[0] = cbk0; + ckv[1] = cbk1; + cg0 = cbk0; + cg1 = cbk1; + for (k=2;k<=n;k++) { + cgk = 2.0*(v0+k-1.0)*cg1/z1+cg0; + ckv[k] = cgk; + cg0 = cg1; + cg1 = cgk; + } + if (real(z) < 0.0) { + for (k=0;k<=n;k++) { + cvk = exp((k+v0)*M_PI*cii); + if (imag(z) < 0.0) { + ckv[k] = cvk*ckv[k]+M_PI*cii*civ[k]; + civ[k] /= cvk; + } + else if (imag(z) > 0.0) { + ckv[k] = ckv[k]/cvk-M_PI*cii*civ[k]; + civ[k] *= cvk; + } + } + } + civp[0] = v0*civ[0]/z+civ[1]; + ckvp[0] = v0*ckv[0]/z-ckv[1]; + for (k=1;k<=n;k++) { + civp[k] = -(k+v0)*civ[k]/z+civ[k-1]; + ckvp[k] = -(k+v0)*ckv[k]/z-ckv[k-1]; + } + vm = n+v0; + return 0; +} diff --git a/cbessjy.cpp b/cbessjy.cpp new file mode 100644 index 0000000..517dc77 --- /dev/null +++ b/cbessjy.cpp @@ -0,0 +1,739 @@ +// cbessjy.cpp -- complex Bessel functions. +// Algorithms and coefficient values from "Computation of Special +// Functions", Zhang and Jin, John Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +#include +using namespace std; +#include "bessel.h" +double gamma(double); + +static complex cii(0.0,1.0); +static complex cone(1.0,0.0); +static complex czero(0.0,0.0); + +#define PI 3.14159 + +int cbessjy01(complex z,complex &cj0,complex &cj1, + complex &cy0,complex &cy1,complex &cj0p, + complex &cj1p,complex &cy0p,complex &cy1p) +{ + complex z1,z2,cr,cp,cs,cp0,cq0,cp1,cq1,ct1,ct2,cu; + double a0,w0,w1; + int k,kz; + + static double a[] = { + -7.03125e-2, + 0.112152099609375, + -0.5725014209747314, + 6.074042001273483, + -1.100171402692467e2, + 3.038090510922384e3, + -1.188384262567832e5, + 6.252951493434797e6, + -4.259392165047669e8, + 3.646840080706556e10, + -3.833534661393944e12, + 4.854014686852901e14, + -7.286857349377656e16, + 1.279721941975975e19}; + static double b[] = { + 7.32421875e-2, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1, + 5.513358961220206e2, + -1.825775547429318e4, + 8.328593040162893e5, + -5.006958953198893e7, + 3.836255180230433e9, + -3.649010818849833e11, + 4.218971570284096e13, + -5.827244631566907e15, + 9.476288099260110e17, + -1.792162323051699e20}; + static double a1[] = { + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947, + 1.215978918765359e2, + -3.302272294480852e3, + 1.276412726461746e5, + -6.656367718817688e6, + 4.502786003050393e8, + -3.833857520742790e10, + 4.011838599133198e12, + -5.060568503314727e14, + 7.572616461117958e16, + -1.326257285320556e19}; + static double b1[] = { + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1, + -6.038440767050702e2, + 1.971837591223663e4, + -8.902978767070678e5, + 5.310411010968522e7, + -4.043620325107754e9, + 3.827011346598605e11, + -4.406481417852278e13, + 6.065091351222699e15, + -9.833883876590679e17, + 1.855045211579828e20}; + + a0 = abs(z); + z2 = z*z; + z1 = z; + if (a0 == 0.0) { + cj0 = cone; + cj1 = czero; + cy0 = complex(-1e308,0); + cy1 = complex(-1e308,0); + cj0p = czero; + cj1p = complex(0.5,0.0); + cy0p = complex(1e308,0); + cy1p = complex(1e308,0); + return 0; + } + if (real(z) < 0.0) z1 = -z; + if (a0 <= 12.0) { + cj0 = cone; + cr = cone; + for (k=1;k<=40;k++) { + cr *= -0.25*z2/(double)(k*k); + cj0 += cr; + if (abs(cr) < abs(cj0)*eps) break; + } + cj1 = cone; + cr = cone; + for (k=1;k<=40;k++) { + cr *= -0.25*z2/(k*(k+1.0)); + cj1 += cr; + if (abs(cr) < abs(cj1)*eps) break; + } + cj1 *= 0.5*z1; + w0 = 0.0; + cr = cone; + cs = czero; + for (k=1;k<=40;k++) { + w0 += 1.0/k; + cr *= -0.25*z2/(double)(k*k); + cp = cr*w0; + cs += cp; + if (abs(cp) < abs(cs)*eps) break; + } + cy0 = M_2_PI*((log(0.5*z1)+el)*cj0-cs); + w1 = 0.0; + cr = cone; + cs = cone; + for (k=1;k<=40;k++) { + w1 += 1.0/k; + cr *= -0.25*z2/(k*(k+1.0)); + cp = cr*(2.0*w1+1.0/(k+1.0)); + cs += cp; + if (abs(cp) < abs(cs)*eps) break; + } + cy1 = M_2_PI*((log(0.5*z1)+el)*cj1-1.0/z1-0.25*z1*cs); + } + else { + if (a0 >= 50.0) kz = 8; // can be changed to 10 + else if (a0 >= 35.0) kz = 10; // " " " 12 + else kz = 12; // " " " 14 + ct1 = z1 - M_PI_4; + cp0 = cone; + for (k=0;k 0.0) { + cy0 += 2.0*cii*cj0; + cy1 = -(cy1+2.0*cii*cj1); + } + cj1 = -cj1; + } + cj0p = -cj1; + cj1p = cj0-cj1/z; + cy0p = -cy1; + cy1p = cy0-cy1/z; + return 0; +} + +int cbessjyna(int n,complex z,int &nm,complex *cj, + complex *cy,complex *cjp,complex *cyp) +{ + complex cbj0,cbj1,cby0,cby1,cj0,cjk,cj1,cf,cf1,cf2; + complex cs,cg0,cg1,cyk,cyl1,cyl2,cylk,cp11,cp12,cp21,cp22; + complex ch0,ch1,ch2; + double a0,yak,ya1,ya0,wa; + int m,k,lb,lb0; + + if (n < 0) return 1; + a0 = abs(z); + nm = n; + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + cj[k] = czero; + cy[k] = complex (-1e308,0); + cjp[k] = czero; + cyp[k] = complex(1e308,0); + } + cj[0] = cone; + cjp[1] = complex(0.5,0.0); + return 0; + } + cbessjy01(z,cj[0],cj[1],cy[0],cy[1],cjp[0],cjp[1],cyp[0],cyp[1]); + cbj0 = cj[0]; + cbj1 = cj[1]; + cby0 = cy[0]; + cby1 = cy[1]; + if (n <= 1) return 0; + if (n < (int)0.25*a0) { + cj0 = cbj0; + cj1 = cbj1; + for (k=2;k<=n;k++) { + cjk = 2.0*(k-1.0)*cj1/z-cj0; + cj[k] = cjk; + cj0 = cj1; + cj1 = cjk; + } + } + else { + m = msta1(a0,200); + if (m < n) nm = m; + else m = msta2(a0,n,15); + cf2 = czero; + cf1 = complex (1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(k+1.0)*cf1/z-cf2; + if (k <=nm) cj[k] = cf; + cf2 = cf1; + cf1 = cf; + } + if (abs(cbj0) > abs(cbj1)) cs = cbj0/cf; + else cs = cbj1/cf2; + for (k=0;k<=nm;k++) { + cj[k] *= cs; + } + } + for (k=2;k<=nm;k++) { + cjp[k] = cj[k-1]-(double)k*cj[k]/z; + } + ya0 = abs(cby0); + lb = 0; + cg0 = cby0; + cg1 = cby1; + for (k=2;k<=nm;k++) { + cyk = 2.0*(k-1.0)*cg1/z-cg0; + yak = abs(cyk); + ya1 = abs(cg0); + if ((yak < ya0) && (yak < ya1)) lb = k; + cy[k] = cyk; + cg0 = cg1; + cg1 = cyk; + } + lb0 = 0; + if ((lb > 4) && (imag(z) != 0.0)) { + while (lb != lb0) { + ch2 = cone; + ch1 = czero; + lb0 = lb; + for (k=lb;k>=1;k--) { + ch0 = 2.0*k*ch1/z-ch2; + ch2 = ch1; + ch1 = ch0; + } + cp12 = ch0; + cp22 = ch2; + ch2 = czero; + ch1 = cone; + for (k=lb;k>=1;k--) { + ch0 = 2.0*k*ch1/z-ch2; + ch2 = ch1; + ch1 = ch0; + } + cp11 = ch0; + cp21 = ch2; + if (lb == nm) + cj[lb+1] = 2.0*lb*cj[lb]/z-cj[lb-1]; + if (abs(cj[0]) > abs(cj[1])) { + cy[lb+1] = (cj[lb+1]*cby0-2.0*cp11/(M_PI*z))/cj[0]; + cy[lb] = (cj[lb]*cby0+2.0*cp12/(M_PI*z))/cj[0]; + } + else { + cy[lb+1] = (cj[lb+1]*cby1-2.0*cp21/(M_PI*z))/cj[1]; + cy[lb] = (cj[lb]*cby1+2.0*cp22/(M_PI*z))/cj[1]; + } + cyl2 = cy[lb+1]; + cyl1 = cy[lb]; + for (k=lb-1;k>=0;k--) { + cylk = 2.0*(k+1.0)*cyl1/z-cyl2; + cy[k] = cylk; + cyl2 = cyl1; + cyl1 = cylk; + } + cyl1 = cy[lb]; + cyl2 = cy[lb+1]; + for (k=lb+1;k z,int &nm,complex *cj, + complex *cy,complex *cjp,complex *cyp) +{ + complex cf,cf0,cf1,cf2,cbs,csu,csv,cs0,ce; + complex ct1,cp0,cq0,cp1,cq1,cu,cbj0,cby0,cbj1,cby1; + complex cyy,cbjk,ct2; + double a0,y0; + int k,m; + static double a[] = { + -0.7031250000000000e-1, + 0.1121520996093750, + -0.5725014209747314, + 6.074042001273483}; + static double b[] = { + 0.7324218750000000e-1, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1}; + static double a1[] = { + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947}; + static double b1[] = { + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1}; + + y0 = abs(imag(z)); + a0 = abs(z); + nm = n; + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + cj[k] = czero; + cy[k] = complex (-1e308,0); + cjp[k] = czero; + cyp[k] = complex(1e308,0); + } + cj[0] = cone; + cjp[1] = complex(0.5,0.0); + return 0; + } + if ((a0 <= 300.0) || (n > (int)(0.25*a0))) { + if (n == 0) nm = 1; + m = msta1(a0,200); + if (m < nm) nm = m; + else m = msta2(a0,nm,15); + cbs = czero; + csu = czero; + csv = czero; + cf2 = czero; + cf1 = complex (1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(k+1.0)*cf1/z-cf2; + if (k <= nm) cj[k] = cf; + if (((k & 1) == 0) && (k != 0)) { + if (y0 <= 1.0) { + cbs += 2.0*cf; + } + else { + cbs += (-1)*((k & 2)-1)*2.0*cf; + } + csu += (double)((-1)*((k & 2)-1))*cf/(double)k; + } + else if (k > 1) { + csv += (double)((-1)*((k & 2)-1)*k)*cf/(double)(k*k-1.0); + } + cf2 = cf1; + cf1 = cf; + } + if (y0 <= 1.0) cs0 = cbs+cf; + else cs0 = (cbs+cf)/cos(z); + for (k=0;k<=nm;k++) { + cj[k] /= cs0; + } + ce = log(0.5*z)+el; + cy[0] = M_2_PI*(ce*cj[0]-4.0*csu/cs0); + cy[1] = M_2_PI*(-cj[0]/z+(ce-1.0)*cj[1]-4.0*csv/cs0); + } + else { + ct1 = z-M_PI_4; + cp0 = cone; + for (k=0;k<4;k++) { + cp0 += a[k]*pow(z,-2.0*k-2.0); + } + cq0 = -0.125/z; + for (k=0;k<4;k++) { + cq0 += b[k] *pow(z,-2.0*k-3.0); + } + cu = sqrt(M_2_PI/z); + cbj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1)); + cby0 = cu*(cp0*sin(ct1)+cq0*cos(ct1)); + cj[0] = cbj0; + cy[0] = cby0; + ct2 = z-0.75*M_PI; + cp1 = cone; + for (k=0;k<4;k++) { + cp1 += a1[k]*pow(z,-2.0*k-2.0); + } + cq1 = 0.375/z; + for (k=0;k<4;k++) { + cq1 += b1[k]*pow(z,-2.0*k-3.0); + } + cbj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2)); + cby1 = cu*(cp1*sin(ct2)+cq1*cos(ct2)); + cj[1] = cbj1; + cy[1] = cby1; + for (k=2;k<=n;k++) { + cbjk = 2.0*(k-1.0)*cbj1/z-cbj0; + cj[k] = cbjk; + cbj0 = cbj1; + cbj1 = cbjk; + } + } + cjp[0] = -cj[1]; + for (k=1;k<=nm;k++) { + cjp[k] = cj[k-1]-(double)k*cj[k]/z; + } + if (abs(cj[0]) > 1.0) + cy[1] = (cj[1]*cy[0]-2.0/(M_PI*z))/cj[0]; + for (k=2;k<=nm;k++) { + if (abs(cj[k-1]) >= abs(cj[k-2])) + cyy = (cj[k]*cy[k-1]-2.0/(M_PI*z))/cj[k-1]; + else + cyy = (cj[k]*cy[k-2]-4.0*(k-1.0)/(M_PI*z*z))/cj[k-2]; + cy[k] = cyy; + } + cyp[0] = -cy[1]; + for (k=1;k<=nm;k++) { + cyp[k] = cy[k-1]-(double)k*cy[k]/z; + } + + return 0; +} + +int cbessjyva(double v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp) +{ + complex z1,z2,zk,cjvl,cr,ca,cjv0,cjv1,cpz,crp; + complex cqz,crq,ca0,cck,csk,cyv0,cyv1,cju0,cju1,cb; + complex cs,cs0,cr0,cs1,cr1,cec,cf,cf0,cf1,cf2; + complex cfac0,cfac1,cg0,cg1,cyk,cp11,cp12,cp21,cp22; + complex ch0,ch1,ch2,cyl1,cyl2,cylk; + + double a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa; + int j,n,k,kz,l,lb,lb0,m; + + a0 = abs(z); + z1 = z; + z2 = z*z; + n = (int)v; + + + v0 = v-n; + + pv0 = M_PI*v0; + pv1 = M_PI*(1.0+v0); + if (a0 < 1.0e-100) { + for (k=0;k<=n;k++) { + cjv[k] = czero; + cyv[k] = complex (-1e308,0); + cjvp[k] = czero; + cyvp[k] = complex (1e308,0); + + } + if (v0 == 0.0) { + cjv[0] = cone; + cjvp[1] = complex (0.5,0.0); + } + else { + cjvp[0] = complex (1e308,0); + } + vm = v; + return 0; + } + if (real(z1) < 0.0) z1 = -z; + if (a0 <= 12.0) { + for (l=0;l<2;l++) { + vl = v0+l; + cjvl = cone; + cr = cone; + for (k=1;k<=40;k++) { + cr *= -0.25*z2/(k*(k+vl)); + cjvl += cr; + if (abs(cr) < abs(cjvl)*eps) break; + } + vg = 1.0 + vl; + ga = gamma(vg); + ca = pow(0.5*z1,vl)/ga; + if (l == 0) cjv0 = cjvl*ca; + else cjv1 = cjvl*ca; + } + } + else { + if (a0 >= 50.0) kz = 8; + else if (a0 >= 35.0) kz = 10; + else kz = 11; + for (j=0;j<2;j++) { + vv = 4.0*(j+v0)*(j+v0); + cpz = cone; + crp = cone; + for (k=1;k<=kz;k++) { + crp = -0.78125e-2*crp*(vv-pow(4.0*k-3.0,2.0))* + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*z2); + cpz += crp; + } + cqz = cone; + crq = cone; + for (k=1;k<=kz;k++) { + crq = -0.78125e-2*crq*(vv-pow(4.0*k-1.0,2.0))* + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*z2); + cqz += crq; + } + cqz *= 0.125*(vv-1.0)/z1; + zk = z1-(0.5*(j+v0)+0.25)*M_PI; + ca0 = sqrt(M_2_PI/z1); + cck = cos(zk); + csk = sin(zk); + if (j == 0) { + cjv0 = ca0*(cpz*cck-cqz*csk); + cyv0 = ca0*(cpz*csk+cqz+cck); + } + else { + cjv1 = ca0*(cpz*cck-cqz*csk); + cyv1 = ca0*(cpz*csk+cqz*cck); + } + } + } + if (a0 <= 12.0) { + if (v0 != 0.0) { + for (l=0;l<2;l++) { + vl = v0+l; + cjvl = cone; + cr = cone; + for (k=1;k<=40;k++) { + cr *= -0.25*z2/(k*(k-vl)); + cjvl += cr; + if (abs(cr) < abs(cjvl)*eps) break; + } + vg = 1.0-vl; + gb = gamma(vg); + cb = pow(2.0/z1,vl)/gb; + if (l == 0) cju0 = cjvl*cb; + else cju1 = cjvl*cb; + } + cyv0 = (cjv0*cos(pv0)-cju0)/sin(pv0); + cyv1 = (cjv1*cos(pv1)-cju1)/sin(pv1); + } + else { + cec = log(0.5*z1)+el; + cs0 = czero; + w0 = 0.0; + cr0 = cone; + for (k=1;k<=30;k++) { + w0 += 1.0/k; + cr0 *= -0.25*z2/(double)(k*k); + cs0 += cr0*w0; + } + cyv0 = M_2_PI*(cec*cjv0-cs0); + cs1 = cone; + w1 = 0.0; + cr1 = cone; + for (k=1;k<=30;k++) { + w1 += 1.0/k; + cr1 *= -0.25*z2/(k*(k+1.0)); + cs1 += cr1*(2.0*w1+1.0/(k+1.0)); + } + cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1); + } + } + if (real(z) < 0.0) { + cfac0 = exp(pv0*cii); + cfac1 = exp(pv1*cii); + if (imag(z) < 0.0) { + cyv0 = cfac0*cyv0-2.0*cii*cos(pv0)*cjv0; + cyv1 = cfac1*cyv1-2.0*cii*cos(pv1)*cjv1; + cjv0 /= cfac0; + cjv1 /= cfac1; + } + else if (imag(z) > 0.0) { + cyv0 = cyv0/cfac0+2.0*cii*cos(pv0)*cjv0; + cyv1 = cyv1/cfac1+2.0*cii*cos(pv1)*cjv1; + cjv0 *= cfac0; + cjv1 *= cfac1; + } + } + cjv[0] = cjv0; + cjv[1] = cjv1; + if ((n >= 2) && (n <= (int)(0.25*a0))) { + cf0 = cjv0; + cf1 = cjv1; + for (k=2;k<= n;k++) { + cf = 2.0*(k+v0-1.0)*cf1/z-cf0; + cjv[k] = cf; + cf0 = cf1; + cf1 = cf; + } + } + else if (n >= 2) { + m = msta1(a0,200); + if (m < n) n = m; + else m = msta2(a0,n,15); + cf2 = czero; + cf1 = complex(1.0e-100,0.0); + for (k=m;k>=0;k--) { + cf = 2.0*(v0+k+1.0)*cf1/z-cf2; + if (k <= n) cjv[k] = cf; + cf2 = cf1; + cf1 = cf; + } + if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf; + else cs = cjv1/cf2; + for (k=0;k<=n;k++) { + cjv[k] *= cs; + } + } + cjvp[0] = v0*cjv[0]/z-cjv[1]; + for (k=1;k<=n;k++) { + cjvp[k] = -(k+v0)*cjv[k]/z+cjv[k-1]; + } + cyv[0] = cyv0; + cyv[1] = cyv1; + ya0 = abs(cyv0); + lb = 0; + cg0 = cyv0; + cg1 = cyv1; + for (k=2;k<=n;k++) { + cyk = 2.0*(v0+k-1.0)*cg1/z-cg0; + yak = abs(cyk); + ya1 = abs(cg0); + if ((yak < ya0) && (yak< ya1)) lb = k; + cyv[k] = cyk; + cg0 = cg1; + cg1 = cyk; + } + lb0 = 0; + if ((lb > 4) && (imag(z) != 0.0)) { + while(lb != lb0) { + ch2 = cone; + ch1 = czero; + lb0 = lb; + for (k=lb;k>=1;k--) { + ch0 = 2.0*(k+v0)*ch1/z-ch2; + ch2 = ch1; + ch1 = ch0; + } + cp12 = ch0; + cp22 = ch2; + ch2 = czero; + ch1 = cone; + for (k=lb;k>=1;k--) { + ch0 = 2.0*(k+v0)*ch1/z-ch2; + ch2 = ch1; + ch1 = ch0; + } + cp11 = ch0; + cp21 = ch2; + if (lb == n) + cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1]; + if (abs(cjv[0]) > abs(cjv[1])) { + cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0]; + cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0]; + } + else { + cyv[lb+1] = (cjv[lb+1]*cyv1-2.0*cp21/(M_PI*z))/cjv[1]; + cyv[lb] = (cjv[lb]*cyv1+2.0*cp22/(M_PI*z))/cjv[1]; + } + cyl2 = cyv[lb+1]; + cyl1 = cyv[lb]; + for (k=lb-1;k>=0;k--) { + cylk = 2.0*(k+v0+1.0)*cyl1/z-cyl2; + cyv[k] = cylk; + cyl2 = cyl1; + cyl1 = cylk; + } + cyl1 = cyv[lb]; + cyl2 = cyv[lb+1]; + for (k=lb+1;k z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp) +{ + //first, compute the bessel functions of fractional order + cbessjyva(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); + + //iterate through each and scale + for(int n = 0; n<=v; n++) + { + cjv[n] = cjv[n] * sqrt(PI/(z * 2.0)); + cyv[n] = cyv[n] * sqrt(PI/(z * 2.0)); + + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(PI / (z * 2.0)); + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(PI / (z * 2.0)); + } + + return 0; + +} + + + diff --git a/colormap.h b/colormap.h new file mode 100644 index 0000000..cc68f32 --- /dev/null +++ b/colormap.h @@ -0,0 +1,229 @@ +#ifndef RTS_COLORMAP_H +#define RTS_COLORMAP_H + +#include +#include +#include +#include "rts/cuda_handle_error.h" + + +#define BREWER_CTRL_PTS 11 + +#ifdef __CUDACC__ +texture cudaTexBrewer; +static cudaArray* gpuBrewer; +#endif + + + +namespace rts{ + namespace colormap{ + +enum colormapType {cmBrewer, cmGrayscale}; + +static void buffer2image(unsigned char* buffer, std::string filename, unsigned int x_size, unsigned int y_size) +{ + //create an image object + QImage image(x_size, y_size, QImage::Format_RGB32); + + int i; + unsigned char r, g, b; + unsigned int x, y; + for(y=0; y +__global__ static void applyBrewer(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + //compute the normalized value on [minVal maxVal] + float a = (gpuSource[i] - minVal) / (maxVal - minVal); + + //lookup the color + float shift = 1.0/BREWER_CTRL_PTS; + float4 color = tex1D(cudaTexBrewer, a+shift); + + gpuDest[i * 3 + 0] = 255 * color.x; + gpuDest[i * 3 + 1] = 255 * color.y; + gpuDest[i * 3 + 2] = 255 * color.z; +} + +template +__global__ static void applyGrayscale(T* gpuSource, unsigned char* gpuDest, unsigned int N, T minVal = 0, T maxVal = 1) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + //compute the normalized value on [minVal maxVal] + float a = (gpuSource[i] - minVal) / (maxVal - minVal); + + gpuDest[i * 3 + 0] = 255 * a; + gpuDest[i * 3 + 1] = 255 * a; + gpuDest[i * 3 + 2] = 255 * a; +} + +template +static void gpu2gpu(T* gpuSource, unsigned char* gpuDest, unsigned int nVals, T minVal = 0, T maxVal = 1, colormapType cm = cmGrayscale, int blockDim = 128) +{ + //This function converts a scalar field on the GPU to a color image on the GPU + int gridDim = (nVals + blockDim - 1)/blockDim; + if(cm == cmGrayscale) + applyGrayscale<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); + else if(cm == cmBrewer) + { + initBrewer(); + applyBrewer<<>>(gpuSource, gpuDest, nVals, minVal, maxVal); + destroyBrewer(); + } + +} + +template +static void gpu2cpu(T* gpuSource, unsigned char* cpuDest, unsigned int nVals, T minVal, T maxVal, colormapType cm = cmGrayscale) +{ + //this function converts a scalar field on the GPU to a color image on the CPU + + //first create the color image on the GPU + + //allocate GPU memory for the color image + unsigned char* gpuDest; + HANDLE_ERROR(cudaMalloc( (void**)&gpuDest, sizeof(unsigned char) * nVals * 3 )); + + //HANDLE_ERROR(cudaMemset(gpuSource, 0, sizeof(T) * nVals)); + + //create the image on the gpu + gpu2gpu(gpuSource, gpuDest, nVals, minVal, maxVal, cm); + + //HANDLE_ERROR(cudaMemset(gpuDest, 0, sizeof(unsigned char) * nVals * 3)); + + //copy the image from the GPU to the CPU + HANDLE_ERROR(cudaMemcpy(cpuDest, gpuDest, sizeof(unsigned char) * nVals * 3, cudaMemcpyDeviceToHost)); + + HANDLE_ERROR(cudaFree( gpuDest )); + +} + +template +static void gpu2image(T* gpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) +{ + //allocate a color buffer + unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); + + //do the mapping + gpu2cpu(gpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); + + //copy the buffer to an image + buffer2image(cpuBuffer, fileDest, x_size, y_size); + + free(cpuBuffer); +} + +#endif + +template +static void cpu2cpu(T* cpuSource, unsigned char* cpuDest, unsigned int nVals, T valMin, T valMax, colormapType cm = cmGrayscale) +{ + int i; + float a; + float range = valMax - valMin; + for(i = 0; i +static void cpu2image(T* cpuSource, std::string fileDest, unsigned int x_size, unsigned int y_size, T valMin, T valMax, colormapType cm = cmGrayscale) +{ + //allocate a color buffer + unsigned char* cpuBuffer = (unsigned char*) malloc(sizeof(unsigned char) * 3 * x_size * y_size); + + //do the mapping + cpu2cpu(cpuSource, cpuBuffer, x_size * y_size, valMin, valMax, cm); + + //copy the buffer to an image + buffer2image(cpuBuffer, fileDest, x_size, y_size); + + free(cpuBuffer); + +} + +}} //end namespace colormap and rts + +#endif + diff --git a/dataTypes.h b/dataTypes.h new file mode 100644 index 0000000..664366b --- /dev/null +++ b/dataTypes.h @@ -0,0 +1,39 @@ +#ifndef DATATYPES_H +#define DATATYPES_H + +#include + +#define PRECISION_SINGLE + +#ifdef PRECISION_SINGLE +typedef float ptype; +#elif defined PRECISION_DOUBLE +typedef double ptype; +#endif + +#define BLOCK 256 +#define SQRT_BLOCK 16 + +#define PI 3.14159 + +//a very small number +#define EPSILON 0.00001 + +//CUDA hybrid code - complex class should run on both the CPU and GPU + + +typedef ptype fieldPoint; + +//hybrid GPU/CPU complex data typ +#include "rts/rtsComplex.h" +#include "rts/rtsVector.h" +#include "rts/rtsPoint.h" +#include "rts/rtsQuad.h" + +typedef rts::rtsComplex bsComplex; +typedef rts::rtsVector bsVector; +typedef rts::rtsPoint bsPoint; +typedef rts::rtsQuad bsRect; + + +#endif diff --git a/defaults.h b/defaults.h new file mode 100644 index 0000000..d2bc201 --- /dev/null +++ b/defaults.h @@ -0,0 +1,86 @@ +#ifndef BIMSIM_DEFAULTS +#define BIMSIM_DEFAULTS + +//default sphere parameters +#define DEFAULT_SPHERE_X 0 +#define DEFAULT_SPHERE_Y 0 +#define DEFAULT_SPHERE_Z 0 +#define DEFAULT_SPHERE_A 1 + +//default near field parameters +#define DEFAULT_LAMBDA 1 +#define DEFAULT_AMPLITUDE 1 +#define DEFAULT_N 1.4 +#define DEFAULT_K 0.5 +#define DEFAULT_FOCUS_X 0 +#define DEFAULT_FOCUS_Y 0 +#define DEFAULT_FOCUS_Z 0 +#define DEFAULT_INCIDENT_ORDER 100 +#define DEFAULT_STABILITY_PARM 1.4 + +//optics +#define DEFAULT_CONDENSER_MIN 0.0 +#define DEFAULT_CONDENSER_MAX 1 + +#define DEFAULT_OBJECTIVE_MIN 0.0 +#define DEFAULT_OBJECTIVE_MAX 1 + +//incident light direction +#define DEFAULT_K_THETA 0 +#define DEFAULT_K_PHI 0 + +//default flags +#define DEFAULT_PLANEWAVE false +#define DEFAULT_VECTORSIM false +//#define DEFAULT_OUTPUT_POINT fileoutStruct::imageObjective + + +#define DEFAULT_SLICE_MIN_X -5 +#define DEFAULT_SLICE_MIN_Y 0 +#define DEFAULT_SLICE_MIN_Z -5 + +#define DEFAULT_SLICE_MAX_X 5 +#define DEFAULT_SLICE_MAX_Y 0 +#define DEFAULT_SLICE_MAX_Z 5 + +#define DEFAULT_SLICE_NORM_X 0 +#define DEFAULT_SLICE_NORM_Y 1 +#define DEFAULT_SLICE_NORM_Z 0 + + +/* +#define DEFAULT_SLICE_MIN_X -6 +#define DEFAULT_SLICE_MIN_Y -6 +#define DEFAULT_SLICE_MIN_Z 1 + +#define DEFAULT_SLICE_MAX_X 6 +#define DEFAULT_SLICE_MAX_Y 6 +#define DEFAULT_SLICE_MAX_Z 1 + +#define DEFAULT_SLICE_NORM_X 0 +#define DEFAULT_SLICE_NORM_Y 0 +#define DEFAULT_SLICE_NORM_Z 1 +*/ + + +#define DEFAULT_FIELD_ORDER 200 + +#define DEFAULT_SAMPLES 200 + +#define DEFAULT_SLICE_RES 256 + +#define DEFAULT_PADDING 1 +#define DEFAULT_SUPERSAMPLE 1 + +#define DEFAULT_INTENSITY_FILE "out_di.bmp" +#define DEFAULT_TRANSMITTANCE_FILE "" +#define DEFAULT_ABSORBANCE_FILE "" +#define DEFAULT_NEAR_FILE "out_n.bmp" +#define DEFAULT_FAR_FILE "out_f.bmp" +#define DEFAULT_EXTENDED_SOURCE "" +#define DEFAULT_FIELD_TYPE "magnitude" +#define DEFAULT_FORMAT fileoutStruct::formatImage +#define DEFAULT_COLORMAP "brewer" + + +#endif diff --git a/fieldslice.cpp b/fieldslice.cpp new file mode 100644 index 0000000..5df3b55 --- /dev/null +++ b/fieldslice.cpp @@ -0,0 +1,89 @@ +#include "fieldslice.h" +#include "dataTypes.h" + +#include "cufft.h" + +#include +#include +using namespace std; + +fieldslice::fieldslice(unsigned int x_size, unsigned int y_size) +{ + //save the slice resolution + R[0] = x_size; + R[1] = x_size; + + scalarField = true; + + //init_gpu(); + + +} + +void fieldslice::toAngularSpectrum() +{ + cufftHandle plan; + + //create a CUFFT plan handle + if(cufftPlan2d(&plan, R[0], R[1], CUFFT_C2C) != CUFFT_SUCCESS) + { + cout<<"Error creating CUFFT plan for computing the angular spectrum."<= N) return; + + ptype xm = x[i].abs(); + + + if(y != NULL && z != NULL) + { + ptype ym = y[i].abs(); + ptype zm = z[i].abs(); + I[i] = xm*xm + ym*ym + zm*zm; + } + else + { + I[i] = xm*xm; + } +} + +__global__ void resample_intensity(bsComplex* x, bsComplex* y, bsComplex* z, ptype* D, int uR, int vR, int ss) +{ + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index into the detector + int i = iv*uR + iu; + + //compute the index into the field + int fi; + + //initialize the intensity for the pixel to zero + ptype I = 0; + ptype xm = 0; + ptype ym = 0; + ptype zm = 0; + + int ix, iy; + for(ix = 0; ix= N) return; + + V[i] = field_component[i].real(); +} + +__global__ void field_imaginary(bsComplex* field_component, ptype* V, unsigned int N) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + V[i] = field_component[i].imag(); +} + +__global__ void field_sqrt(ptype* input, ptype* output, unsigned int N) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + output[i] = sqrt(input[i]); + +} + + +__global__ void field_scale(bsComplex* x, bsComplex* y, bsComplex* z, unsigned int N, ptype v) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + if(x != NULL) + x[i] *= v; + if(y != NULL) + y[i] *= v; + if(z != NULL) + z[i] *= v; +} + + +scalarslice fieldslice::Mag() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + scalarslice* result = new scalarslice(R[0], R[1]); + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + field_intensity<<>>(x_hat, y_hat, z_hat, result->S, N); + field_sqrt<<>>(result->S, result->S, N); + + return *result; +} + +scalarslice fieldslice::Real() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* result = new scalarslice(R[0], R[1]); + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + field_real<<>>(x_hat, result->S, N); + + return *result; +} + +scalarslice fieldslice::Imag() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* result = new scalarslice(R[0], R[1]); + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + field_imaginary<<>>(x_hat, result->S, N); + + return *result; +} + +void fieldslice::IntegrateAndResample(scalarslice* detector, unsigned int supersample) +{ + //compute the intensity and resample at the detector resolution + unsigned int D[2]; + D[0] = detector->R[0]; + D[1] = detector->R[1]; + + //create one thread for each detector pixel + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((D[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (D[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + resample_intensity<<>>(x_hat, y_hat, z_hat, detector->S, D[0], D[1], supersample); +} + +scalarslice fieldslice::Intensity() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* result = new scalarslice(R[0], R[1]); + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + field_intensity<<>>(x_hat, y_hat, z_hat, result->S, N); + + return *result; +} + +void fieldslice::ScaleField(ptype v) +{ + //This function scales the field by some constant value v + //This is mostly used for the inverse FFT, which has to divide the field by R^2 + + //compute the total number of values in the slice + unsigned int N = R[0] * R[1]; + //cout<<"Size of mag field: "<>>(x_hat, y_hat, z_hat, N, v); + +} + +void fieldslice::init_gpu() +{ + //allocate space on the GPU for the field slice + HANDLE_ERROR(cudaMalloc((void**)&x_hat, R[0] * R[1] * sizeof(bsComplex))); + //HANDLE_ERROR(cudaMemset(x_hat, 0, R[0] * R[1] * sizeof(bsComplex))); + + //if the field is scalar, y_hat and z_hat are unused + if(scalarField) + { + y_hat = NULL; + z_hat = NULL; + + } + else + { + HANDLE_ERROR(cudaMalloc((void**)&y_hat, R[0] * R[1] * sizeof(bsComplex))); + //HANDLE_ERROR(cudaMemset(y_hat, 0, R[0] * R[1] * sizeof(bsComplex))); + + HANDLE_ERROR(cudaMalloc((void**)&z_hat, R[0] * R[1] * sizeof(bsComplex))); + //HANDLE_ERROR(cudaMemset(z_hat, 0, R[0] * R[1] * sizeof(bsComplex))); + } + + clear_gpu(); +} + +void fieldslice::kill_gpu() +{ + if(x_hat != NULL) + HANDLE_ERROR(cudaFree(x_hat)); + if(y_hat != NULL) + HANDLE_ERROR(cudaFree(y_hat)); + if(z_hat != NULL) + HANDLE_ERROR(cudaFree(z_hat)); + +} + +void fieldslice::clear_gpu() +{ + int memsize = R[0] * R[1] * sizeof(bsComplex); + if(x_hat != NULL) + HANDLE_ERROR(cudaMemset(x_hat, 0, memsize)); + if(y_hat != NULL) + HANDLE_ERROR(cudaMemset(y_hat, 0, memsize)); + if(z_hat != NULL) + HANDLE_ERROR(cudaMemset(z_hat, 0, memsize)); + +} + +__global__ void copy_crop(bsComplex* source, bsComplex* dest, int u, int v, int su, int sv, int uR, int vR) +{ + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= su || iv >= sv) return; + + //compute the destination index + int i = iv*su + iu; + + //compute the source index + int sourceV = v + iv; + int sourceU = u + iu; + int is = sourceV * uR + sourceU; + + dest[i] = source[is]; + +} + +fieldslice fieldslice::crop(int u, int v, int su, int sv) +{ + //create a new field slice with the appropriate settings + fieldslice result(su, sv); + result.scalarField = scalarField; + + //allocate space for the new field + result.init_gpu(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((su + SQRT_BLOCK -1)/SQRT_BLOCK, (sv + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //call a kernel to copy the cropped to the new field slice + if(x_hat != NULL) + copy_crop<<>>(x_hat, result.x_hat, u, v, su, sv, R[0], R[1]); + if(y_hat != NULL) + copy_crop<<>>(y_hat, result.y_hat, u, v, su, sv, R[0], R[1]); + if(z_hat != NULL) + copy_crop<<>>(z_hat, result.z_hat, u, v, su, sv, R[0], R[1]); + + return result; +} diff --git a/fieldslice.h b/fieldslice.h new file mode 100644 index 0000000..a8c449c --- /dev/null +++ b/fieldslice.h @@ -0,0 +1,58 @@ +#ifndef FIELDSLICE_H +#define FIELDSLICE_H + +#include "dataTypes.h" + +#include +#include +#include "rts/rtsQuad.h" + +#include "scalarslice.h" + + +struct fieldslice +{ + //images representing the slice for each world-space polarization direction + bsComplex* x_hat; + bsComplex* y_hat; + bsComplex* z_hat; + + + + //slice resolution + unsigned int R[2]; + + //if the slice is a scalar + bool scalarField; + + fieldslice(unsigned int x_res, unsigned int y_res); + + fieldslice(); + + ~fieldslice(); + + //void setPos(bsPoint pMin, bsPoint pMax, bsVector N); + + scalarslice Mag(); + scalarslice Real(); + scalarslice Imag(); + scalarslice Intensity(); + void IntegrateAndResample(scalarslice* detector, unsigned int supersample); + + void ScaleField(ptype v); + + //convert the field slice to the angular spectrum + void toAngularSpectrum(); + void fromAngularSpectrum(); + + //crop a region from the field + fieldslice crop(int u, int v, int su, int sv); + + void init_gpu(); + void kill_gpu(); + void clear_gpu(); + + std::string toStr(); +}; + +#endif diff --git a/fileout.cu b/fileout.cu new file mode 100644 index 0000000..9dc7bcd --- /dev/null +++ b/fileout.cu @@ -0,0 +1,202 @@ +#include "fileout.h" +//#include "scalarfield.h" + + +/*void fileoutStruct::saveMag(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + int Rx = U->R[0]; + int Ry = U->R[1]; + + //allocate space for a scalar field on the GPU + ptype* gpuScalar; + int memsize = sizeof(ptype) * Rx * Ry; + HANDLE_ERROR(cudaMalloc((void**) &gpuScalar, memsize)); + HANDLE_ERROR(cudaMemset(gpuScalar, 0, memsize)); + U->Mag(gpuScalar); + + + rts::colormap::gpu2image(gpuScalar, filename, Rx, Ry, 0, colorMax, cmap); + + HANDLE_ERROR(cudaFree(gpuScalar)); +} + +void fileoutStruct::saveReal_scalar(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + //returns the real component + scalarslice sf = U->Real(); + sf.toImage(filename, false, cmap); + +} + +void fileoutStruct::saveImag_scalar(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + //returns the imaginary component of a field (assumed scalar) + scalarslice sf = U->Imag(); + sf.toImage(filename, false, cmap); +} + +void fileoutStruct::saveIntensity(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + //get the intensity of the field + scalarslice sf = U->Intensity(); + sf.toImage(filename, true, cmap); +} + +void fileoutStruct::saveAngularSpectrum(fieldslice* U, std::string filename, rts::colormap::colormapType cmap) +{ + ptype* gpuScalar; + int memsize = sizeof(ptype) * U->R[0] * U->R[1]; + HANDLE_ERROR(cudaMalloc((void**) &gpuScalar, memsize)); + HANDLE_ERROR(cudaMemset(gpuScalar, 0, memsize)); + + //convert the field slice to its angular spectrum + U->toAngularSpectrum(); + + //convert the angular spectrum to a scalar field + U->Mag(gpuScalar); + + //save the color image + rts::colormap::gpu2image(gpuScalar, filename, U->R[0], U->R[1], 0, colorMax, cmap); + + HANDLE_ERROR(cudaFree(gpuScalar)); + +}*/ + +void fileoutStruct::saveNearField(nearfieldStruct* nf) +{ + if(nearFile == "") return; + + if(field == fieldReal) + { + scalarslice S = nf->U.Real(); + S.toImage(nearFile, false, colormap); + } + if(field == fieldImag) + { + scalarslice S = nf->U.Imag(); + S.toImage(nearFile, false, colormap); + } + if(field == fieldMag) + { + scalarslice S = nf->U.Mag(); + S.toImage(nearFile, true, colormap); + } +} + +void fileoutStruct::saveFarField(microscopeStruct* scope) +{ + if(farFile == "") return; + + if(field == fieldReal) + { + scalarslice S = scope->Ud.Real(); + S.toImage(farFile, false, colormap); + } + if(field == fieldImag) + { + scalarslice S = scope->Ud.Imag(); + S.toImage(farFile, false, colormap); + } + if(field == fieldMag) + { + scalarslice S = scope->Ud.Mag(); + S.toImage(farFile, true, colormap); + } + +} + +void fileoutStruct::saveDetector(microscopeStruct* scope) +{ + //intensity + if(intFile != "") + { + scalarslice I = scope->getIntensity(); + + if(is_binary(intFile)) + I.toEnvi(intFile, scope->nf.lambda); + else + I.toImage(intFile); + } + //absorbance + if(absFile != "") + { + scalarslice I = scope->getAbsorbance(); + + if(is_binary(absFile)) + I.toEnvi(absFile, scope->nf.lambda); + else + I.toImage(absFile); + } + //transmittance + if(transFile != "") + { + scalarslice I = scope->getTransmittance(); + + if(is_binary(transFile)) + I.toEnvi(transFile, scope->nf.lambda); + else + I.toImage(transFile); + } + +} + +bool fileoutStruct::is_binary(std::string filename) +{ + //this function guesses if a file name is binary or a standard image + // do this by just testing extensions + + //get the extension + size_t i = filename.find_last_of('.'); + + //if there is no extension, return true + if( i == std::string::npos ) + return true; + + //otherwise grab the extension + std::string ext = filename.substr(i+1); + if(ext == "bmp" || + ext == "jpg" || + ext == "tif" || + ext == "gif" || + ext == "png") + return false; + else + return true; +} + + + +void fileoutStruct::Save(microscopeStruct* scope) +{ + //save images of the fields in the microscope + + //if the user specifies an extended source + if(scope->focalPoints.size() > 1) + { + //simulate the extended source and output the detector image + scope->SimulateExtendedSource(); + + } + else + { + //run the near-field simulation + scope->SimulateScattering(); + + //output the near field image + saveNearField(&scope->nf); + + //run the far-field simulation + scope->SimulateImaging(); + + saveFarField(scope); + + } + + //save the detector images + saveDetector(scope); + + +} + + + diff --git a/fileout.h b/fileout.h new file mode 100644 index 0000000..e3f8887 --- /dev/null +++ b/fileout.h @@ -0,0 +1,48 @@ +#ifndef FILE_OUTPUT_H +#define FILE_OUTPUT_H + +#include +//#include "defaults.h" +#include "dataTypes.h" + +#include "colormap.h" +#include "fieldslice.h" +#include "nearfield.h" +#include "microscope.h" +#include "rts/cuda_handle_error.h" + +struct fileoutStruct{ + + //output file names + std::string nearFile; //near field filename + std::string farFile; //far field filename + std::string intFile; //detector intensity filename + std::string absFile; //detector absorbance filename + std::string transFile; //detector transmission filename + + //output type + enum field_type {fieldMag, fieldIntensity, fieldAbsorbance, fieldPolar, fieldImag, fieldReal, fieldAngularSpectrum}; + enum image_source {imageNearField, imageObjective, imageDetector, imageExtendedSource}; + + field_type field; + + //image_source source; + + //color map info + rts::colormap::colormapType colormap; + ptype colorMax; + + + void Save(microscopeStruct* scope); + void Simulate(microscopeStruct* scope); + + private: + bool is_binary(std::string filename); + void saveNearField(nearfieldStruct* nf); + void saveFarField(microscopeStruct* scope); + void saveDetector(microscopeStruct* scope); + + +}; + +#endif diff --git a/gamma.cpp b/gamma.cpp new file mode 100644 index 0000000..150eb9c --- /dev/null +++ b/gamma.cpp @@ -0,0 +1,82 @@ +// gamma.cpp -- computation of gamma function. +// Algorithms and coefficient values from "Computation of Special +// Functions", Zhang and Jin, John Wiley and Sons, 1996. +// +// (C) 2003, C. Bond. All rights reserved. +// +// Returns gamma function of argument 'x'. +// +// NOTE: Returns 1e308 if argument is a negative integer or 0, +// or if argument exceeds 171. +// +#define _USE_MATH_DEFINES +#include +double gamma(double x) +{ + int i,k,m; + double ga,gr,r,z; + + static double g[] = { + 1.0, + 0.5772156649015329, + -0.6558780715202538, + -0.420026350340952e-1, + 0.1665386113822915, + -0.421977345555443e-1, + -0.9621971527877e-2, + 0.7218943246663e-2, + -0.11651675918591e-2, + -0.2152416741149e-3, + 0.1280502823882e-3, + -0.201348547807e-4, + -0.12504934821e-5, + 0.1133027232e-5, + -0.2056338417e-6, + 0.6116095e-8, + 0.50020075e-8, + -0.11812746e-8, + 0.1043427e-9, + 0.77823e-11, + -0.36968e-11, + 0.51e-12, + -0.206e-13, + -0.54e-14, + 0.14e-14}; + + if (x > 171.0) return 1e308; // This value is an overflow flag. + if (x == (int)x) { + if (x > 0.0) { + ga = 1.0; // use factorial + for (i=2;i 1.0) { + z = fabs(x); + m = (int)z; + r = 1.0; + for (k=1;k<=m;k++) { + r *= (z-k); + } + z -= m; + } + else + z = x; + gr = g[24]; + for (k=23;k>=0;k--) { + gr = gr*z+g[k]; + } + ga = 1.0/(gr*z); + if (fabs(x) > 1.0) { + ga *= r; + if (x < 0.0) { + ga = -M_PI/(x*ga*sin(M_PI*x)); + } + } + } + return ga; +} diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..dfdba62 --- /dev/null +++ b/main.cpp @@ -0,0 +1,65 @@ +#include "sphere.h" +#include "rts/material.h" +#include +//#include "rts/complex.h" + +#include "nearfield.h" +//nearfieldStruct* NF; + +#include "microscope.h" +microscopeStruct* SCOPE; + +#include "fieldslice.h" + +#include "fileout.h" +#include "options.h" +#include "montecarlo.h" +#include "rts/rtsPoint.h" +#include "rts/sbessel.h" +#include "rts/rtsMatrix.h" +#include "rts/rtsQuaternion.h" + +#include "rtsEnvi.h" + +#include "warnings.h" + +fileoutStruct gFileOut; +using namespace std; + +int cbessjyva(double v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp); + +int main(int argc, char *argv[]) +{ + //test Envi loading and saving + //EnviFile envi("testenvi", "w"); + + //float* data = (float*)malloc(sizeof(float) * 100 * 100); + //envi.addBand(data, 100, 100, 100); + + //envi.close(); + + //return 0; + + SCOPE = new microscopeStruct(); + + cout<nf.Uf.R[0]<init(); + + OutputOptions(); + + gFileOut.Save(SCOPE); + + //NF->destroy(); + SCOPE->destroy(); + + + + +} diff --git a/microscope.cu b/microscope.cu new file mode 100644 index 0000000..9154a5e --- /dev/null +++ b/microscope.cu @@ -0,0 +1,303 @@ +#include "microscope.h" + +#include "rts/cuda_handle_error.h" +#include "rts/rtsProgressBar.h" +#include "rts/cuda_timer.h" +#include "dataTypes.h" +#include "colormap.h" + +#include + +__global__ void bandpass(bsComplex* U, int uR, int vR, ptype du, ptype dv, ptype NAin, ptype NAout, ptype lambda) +{ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + ptype u, v; + + if(iu <= uR / 2) + u = (ptype)iu * du; + else + u = -(ptype)(uR - 1 - iu) * du; + + if(iv <= vR / 2) + v = (ptype)iv * dv; + else + v = -(ptype)(vR - 1 - iv) * dv; + + ptype fmag = sqrt(u*u + v*v); + + if(fmag < NAin / lambda || fmag > NAout / lambda) + U[i] = 0; + //U[i] = U[i]; +} + +microscopeStruct::microscopeStruct() +{ + scalarSim = true; + D = NULL; + Di = NULL; +} + +void microscopeStruct::init() +{ + //Ud.scalarField = scalarSim; + //Ufd.scalarField = scalarSim; + + //Ud.init_gpu(); + //Ufd.init_gpu(); + + //initialize the near field + nf.init(); + + //allocate space for the detector + D = new scalarslice(Ud.R[0] / ss, Ud.R[1] / ss); + Di = new scalarslice(Ud.R[0] / ss, Ud.R[1] / ss); + + //clear the detector + clearDetector(); + +} + +void microscopeStruct::destroy() +{ + delete D; + D = NULL; + + delete Di; + Di = NULL; + + Ud.kill_gpu(); + Ufd.kill_gpu(); + + //destroy the near field + nf.destroy(); + +} + +void microscopeStruct::applyBandpass() +{ + //This function applies the objective bandpass to the near field + //The near field structure stores the results, in order to save memory + + //first convert the near field to an angular spectrum (FFT) + nf.U.toAngularSpectrum(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((nf.U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (nf.U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //compute the step size in the frequency domain + ptype du = 1.0 / (nf.pos.X.len()); + ptype dv = 1.0 / (nf.pos.Y.len()); + + //apply the objective band-pass filter + bandpass<<>>(nf.U.x_hat, nf.U.R[0], nf.U.R[1], du, dv, objective[0], objective[1], nf.lambda); + + //convert the near field image back to the spatial domain + // (this is the field at the detector) + nf.U.fromAngularSpectrum(); +} + +void microscopeStruct::getFarField() +{ + //Compute the Far Field image of the focal plane + + //clear the memory from previous detector fields + Ud.kill_gpu(); + Ufd.kill_gpu(); + + //first crop the filtered near-field image of the source and scattered fields + Ud = nf.U.crop(padding * Ud.R[0], padding * Ud.R[1], Ud.R[0], Ud.R[1]); + Ufd = nf.Uf.crop(padding * Ufd.R[0], padding * Ufd.R[1], Ufd.R[0], Ufd.R[1]); + +} + +void microscopeStruct::integrateDetector() +{ + Ud.IntegrateAndResample(D, ss); + Ufd.IntegrateAndResample(Di, ss); +} + +void microscopeStruct::clearDetector() +{ + //zero-out the detector + D->clear(); + Di->clear(); +} + +//flag for a vector simulation +void microscopeStruct::setPos(bsPoint pMin, bsPoint pMax, bsVector normal) +{ + pos = rts::rtsQuad(pMin, pMax, normal); +} + +void microscopeStruct::setRes(int x_res, int y_res, int pad, int supersampling) +{ + padding = pad; + ss = supersampling; + + Ufd.R[0] = Ud.R[0] = x_res * ss; + Ufd.R[1] = Ud.R[1] = y_res * ss; +} + +void microscopeStruct::setNearfield() +{ + //sets the values for the near field in order to create the specified detector image + + //compute the size of the near-field slice necessary to create the detector image + nf.pos = pos * (padding * 2 + 1); + + //compute the resolution of the near-field slice necessary to create the detector image + nf.setRes(Ud.R[0] * (padding * 2 + 1), Ud.R[1] * (padding * 2 + 1)); + +} + +__global__ void calc_absorbance(ptype* A, ptype* D, ptype* Di, int N) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + A[i] = -log10(D[i] / Di[i]); + +} + +scalarslice microscopeStruct::getAbsorbance() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* A = new scalarslice(D->R[0], D->R[1]); + + //compute the total number of values in the slice + unsigned int N = D->R[0] * D->R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + calc_absorbance<<>>(A->S, D->S, Di->S, N); + + return *A; +} + +__global__ void calc_transmittance(ptype* A, ptype* D, ptype* Di, int N) +{ + //compute the index for this thread + int i = blockIdx.x * blockDim.x + threadIdx.x; + if(i >= N) return; + + A[i] = D[i] / Di[i]; + +} + +scalarslice microscopeStruct::getTransmittance() +{ + //compute the magnitude of the field at each rtsPoint in the slice + + //create a scalar slice at the same resolution as the field + scalarslice* T = new scalarslice(D->R[0], D->R[1]); + + //compute the total number of values in the slice + unsigned int N = D->R[0] * D->R[1]; + int gridDim = (N+BLOCK-1)/BLOCK; + + calc_transmittance<<>>(T->S, D->S, Di->S, N); + + return *T; +} + +scalarslice microscopeStruct::getIntensity() +{ + //create a scalar slice at the same resolution as the field + scalarslice* I = new scalarslice(D->R[0], D->R[1]); + + HANDLE_ERROR(cudaMemcpy(I->S, D->S, sizeof(ptype) * D->R[0] * D->R[1], cudaMemcpyDeviceToDevice)); + + return *I; + +} + +void microscopeStruct::SimulateScattering() +{ + nf.Simulate(); +} + +void microscopeStruct::SimulateImaging() +{ + applyBandpass(); + getFarField(); + integrateDetector(); +} + +void microscopeStruct::Simulate() +{ + SimulateScattering(); + SimulateImaging(); +} + +void microscopeStruct::SimulateExtendedSource() +{ + + clearDetector(); + + //for each source in the source list + int npts = focalPoints.size(); + float t=0; + for(int i = 0; i +using namespace std; + +struct sourcePoint +{ + bsPoint f; + ptype A; +}; + +struct microscopeStruct +{ + int padding; + + int ss; + + //field slice at the detector + fieldslice Ufd; + fieldslice Ud; + bool scalarSim; + + //detector image + scalarslice* D; //sample + scalarslice* Di; //incident field image + + ptype objective[2]; + + //image position and orientation in world space + rts::rtsQuad pos; + + vector focalPoints; + + microscopeStruct(); + + nearfieldStruct nf; + + void init(); + void destroy(); + + //functions for dealing with extended sources + void SimulateExtendedSource(); + void LoadExtendedSource(std::string filename); + + void SimulateScattering(); + void SimulateImaging(); + void Simulate(); + void setNearfield(); + void setRes(int x_res, int y_res, int pad, int supersampling); + void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); + + void applyBandpass(); + void getFarField(); + void integrateDetector(); + void clearDetector(); + + //compute detector measurements + scalarslice getAbsorbance(); + scalarslice getTransmittance(); + scalarslice getIntensity(); + + + +}; + +#endif diff --git a/montecarlo.cpp b/montecarlo.cpp new file mode 100644 index 0000000..7b9c09f --- /dev/null +++ b/montecarlo.cpp @@ -0,0 +1,70 @@ +#include "montecarlo.h" +#include "rts/rtsQuaternion.h" +#include "rts/rtsMatrix.h" +#include +#include +using namespace std; + +void mcSampleNA(bsVector* samples, int N, bsVector k, ptype NAin, ptype NAout) +{ + /*Create Monte-Carlo samples of a cassegrain objective by performing uniform sampling + of a sphere and projecting these samples onto an inscribed sphere. + + samples = rtsPointer to sample vectors specified as normalized cartesian coordinates + N = number of samples + kSph = incident light direction in spherical coordinates + NAin = internal obscuration NA + NAout = outer cassegrain NA + */ + + //get the axis of rotation for transforming (0, 0, 1) to k + //k = -k; + ptype cos_angle = k.dot(bsVector(0, 0, 1)); + rts::rtsMatrix rotation; + if(cos_angle != 1.0) + { + bsVector axis = bsVector(0, 0, 1).cross(k).norm(); + + ptype angle = acos(cos_angle); + rts::rtsQuaternion quat; + quat.CreateRotation(angle, axis); + rotation = quat.toMatrix(); + } + + //find the phi values associated with the cassegrain ring + ptype inPhi = asin(NAin); + ptype outPhi = asin(NAout); + + //cout<<"inPhi: "<(pMin, pMax, normal); +} + +void nearfieldStruct::setRes(int x_res, int y_res) +{ + U.R[0] = Uf.R[0] = x_res; + U.R[1] = Uf.R[1] = y_res; +} + +std::string nearfieldStruct::toStr() +{ + std::stringstream ss; + + ss<<"------Field Parameters-------"< n = mVector[imat](lambda); + //std::cout<<"Sphere refractive index: "< + +#define EPSILON_FLOAT 0.000001 + +//This structure stores values relevant to creating the near field +struct nearfieldStruct +{ + //incident wavelength + ptype lambda; + + //condenser numerical aperature (internal and external) + ptype condenser[2]; + + //amplitude of the incident field + ptype A; + + //position of the focus in 3D space + bsVector k; //cartesian coordinates, normalized + bsPoint focus; + + //slice position and orientation in world space + rts::rtsQuad pos; + + //slices for the focused field + fieldslice Uf; + // and total field: Uf + sum(Us) + fieldslice U; + + //incident field order + int m; + + //flag for a vector simulation + bool scalarSim; + + //flag for a plane wave + bool planeWave; + + + + //---------Scatterers------------ + + //angular resolution of scatterers + // number of divisions in [0 pi] + unsigned int angRes; + + //list of materials + std::vector< rts::material > mVector; + + //list of scatterers + std::vector sVector; + + nearfieldStruct(); + + void init(); + void destroy(); + + void Simulate(); + + void calcSpheres(); + + //plane waves for Monte-Carlo sampling + std::vector inWaves; + int nWaves; + void calcWaves(); + + std::string toStr(); + + void setRes(int x_res, int y_res); + + void setPos(bsPoint pMin, bsPoint pMax, bsVector normal); + + //this function re-computes the focused field + void scalarUf(); + + //compute the field scattered by all of the materials + void scalarUs(); + + //add the incident field to the sum of scattered fields + void sumUf(); + + + + + +}; + + + +#endif diff --git a/nfScalarUf.cu b/nfScalarUf.cu new file mode 100644 index 0000000..ec04ed2 --- /dev/null +++ b/nfScalarUf.cu @@ -0,0 +1,199 @@ +#include "nearfield.h" +#include "rts/sbessel.h" +#include "rts/legendre.h" +#include +#include "rts/cuda_handle_error.h" +#include "rts/cuda_timer.h" + + +__global__ void gpuScalarUfp(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR) +{ + /*Compute the scalar focused field using Debye focusing + k = direction of focused light, where |k| = 2*pi/lambda + P = rect struct describing the field slice + rX, rY = resolution of the field slice + cNAin = inner NA of the condenser + cNAout = outer NA of the condenser + */ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / uR; + ptype v = (ptype)iv / vR; + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r = p - f; + //ptype d = r.len(); + + ptype k_dot_r = kmag * k.dot(r); + bsComplex d(0, k_dot_r); + + Uf[i] = exp(d) * A; + +} + +__global__ void gpuScalarUf(bsComplex* Uf, bsVector k, ptype kmag, bsPoint f, ptype A, bsRect ABCD, int uR, int vR, ptype cosAlpha, ptype cosBeta, int nl, ptype j_conv = 1.4) +{ + /*Compute the scalar focused field using Debye focusing + k = direction of focused light, where |k| = 2*pi/lambda + P = rect struct describing the field slice + rX, rY = resolution of the field slice + cNAin = inner NA of the condenser + cNAout = outer NA of the condenser + */ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / (uR); + ptype v = (ptype)iv / (vR); + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r = p - f; + ptype d = r.len(); + if(d < EPSILON_FLOAT) + { + Uf[i] = A * 2 * PI * (cosAlpha - cosBeta); + return; + } + + //get info for the light direction and frequency + //k = k.norm(); + r = r.norm(); + + //compute the imaginary factor i^l + bsComplex im = bsComplex(0, 1); + bsComplex il = bsComplex(1, 0); + + //Bessel and Legendre functions are computed dynamically to save memory + //initialize the Bessel and Legendre functions + ptype j[2]; + ptype kd = kmag * d; + rts::init_sbesselj(kd, j); + + ptype P[2]; + //get the angle between k and r (light direction and position vector) + ptype cosTheta; + cosTheta = k.dot(r); + + //deal with the degenerate case where r == 0 + //if(isnan(cosTheta)) + // cosTheta = 0; + rts::init_legendre(cosTheta, P[0], P[1]); + + //initialize legendre functions for the cassegrain angles + ptype Palpha[3]; + //ptype cosAlpha = cos(asin(cNAin)); + rts::init_legendre(cosAlpha, Palpha[0], Palpha[1]); + Palpha[2] = 1; + + ptype Pbeta[3]; + //ptype cosBeta = cos(asin(cNAout)); + rts::init_legendre(cosBeta, Pbeta[0], Pbeta[1]); + Pbeta[2] = 1; + + //for each order l + bsComplex sumUf(0.0, 0.0); + ptype jl = 0.0; + ptype Pl; + for(int l = 0; l<=nl; l++) + { + + if(l==0) + { + + jl = j[0]; + Pl = P[0]; + } + else if(l==1) + { + jl = j[1]; + Pl = P[1]; + + //adjust the cassegrain Legendre function + Palpha[2] = Palpha[0]; + rts::shift_legendre(l+1, cosAlpha, Palpha[0], Palpha[1]); + Pbeta[2] = Pbeta[0]; + rts::shift_legendre(l+1, cosBeta, Pbeta[0], Pbeta[1]); + } + else + { + rts::shift_sbesselj(l, kd, j);//, j_conv); + rts::shift_legendre(l, cosTheta, P[0], P[1]); + + jl = j[1]; + Pl = P[1]; + + //adjust the cassegrain outer Legendre function + Palpha[2] = Palpha[0]; + rts::shift_legendre(l+1, cosAlpha, Palpha[0], Palpha[1]); + Pbeta[2] = Pbeta[0]; + rts::shift_legendre(l+1, cosBeta, Pbeta[0], Pbeta[1]); + } + + sumUf += il * jl * Pl * (Palpha[1] - Palpha[2] - Pbeta[1] + Pbeta[2]); + + il *= im; + } + + Uf[i] = sumUf * 2 * PI * A; + +} + +void nearfieldStruct::scalarUf() +{ + //Compute the incident field via a scalar simulation + //This method uses Debye focusing to approximate the field analytically + + //time the calculation of the focused field + //gpuStartTimer(); + + //set the field slice to a scalar field + //Uf.scalarField = true; + + //initialize the GPU arrays + //Uf.init_gpu(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((Uf.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (Uf.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //if we are computing a plane wave, call the gpuScalarUfp function + if(planeWave) + { + gpuScalarUfp<<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1]); + } + //otherwise compute the condenser info and create a focused field + else + { + //pre-compute the cosine of the obscuration and objective angles + ptype cosAlpha = cos(asin(condenser[0])); + ptype cosBeta = cos(asin(condenser[1])); + //compute the scalar Uf field (this will be in the x_hat channel of Uf) + gpuScalarUf<<>>(Uf.x_hat, k, 2 * PI / lambda, focus, A, pos, Uf.R[0], Uf.R[1], cosAlpha, cosBeta, m); + } + + //float t = gpuStopTimer(); + //std::cout<<"Scalar Uf Time: "< +#include "rts/cuda_handle_error.h" +#include "rts/cuda_timer.h" + +__device__ bsComplex calc_Us(ptype kd, ptype cos_theta, int Nl, bsComplex* B) +{ + //initialize the spherical Bessel functions + ptype j[2]; + rts::init_sbesselj(kd, j); + ptype y[2]; + rts::init_sbessely(kd, y); + + //initialize the Legendre polynomial + ptype P[2]; + rts::init_legendre(cos_theta, P[0], P[1]); + + //initialize the spherical Hankel function + bsComplex h((ptype)0, (ptype)0); + + //initialize the result + bsComplex Us((ptype)0, (ptype)0); + + //for each order up to Nl + for(int l=0; l<=Nl; l++) + { + if(l == 0) + { + h.r = j[0]; + h.i = y[0]; + Us += B[0] * h * P[0]; + } + else + { + //shift the bessel functions and legendre polynomials + if(l > 1) + { + rts::shift_legendre(l, cos_theta, P[0], P[1]); + rts::shift_sbesselj(l, kd, j); + rts::shift_sbessely(l, kd, y); + } + + h.r = j[1]; + h.i = y[1]; + Us += B[l] * h * P[1]; + + + } + } + return Us; +} + +__device__ bsComplex calc_Ui(bsComplex knd, ptype cos_theta, int Nl, bsComplex* A) +{ + //calculate the internal field of a sphere + + bsComplex Ui((ptype)0, (ptype)0); + + //deal with rtsPoints near zero + if(real(knd) < EPSILON_FLOAT) + { + //for(int l=0; l(knd, j); + + //initialize the Legendre polynomial + ptype P[2]; + rts::init_legendre(cos_theta, P[0], P[1]); + + //for each order up to Nl + for(int l=0; l<=Nl; l++) + { + if(l == 0) + { + Ui += A[0] * j[0] * P[0]; + } + else + { + //shift the bessel functions and legendre polynomials + if(l > 1) + { + rts::shift_legendre(l, cos_theta, P[0], P[1]); + rts::shift_sbesselj(l, knd, j); + } + + Ui += A[l] * j[1] * P[1]; + + + } + } + return Ui; +} + +__global__ void gpuScalarUsp(bsComplex* Us, bsVector* k, int nk, ptype kmag, bsPoint f, bsPoint ps, ptype a, bsComplex n, bsComplex* Beta, bsComplex* Alpha, int Nl, ptype A, bsRect ABCD, int uR, int vR) +{ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / (uR); + ptype v = (ptype)iv / (vR); + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r = p - ps; + ptype d = r.len(); + + bsComplex sumUs(0, 0); + //for each plane wave in the wave list + for(int iw = 0; iw < nk; iw++) + { + //normalize the direction vectors and find their inner product + r = r.norm(); + ptype cos_theta = k[iw].dot(r); + + //compute the phase factor for spheres that are not at the origin + bsVector c = ps - f; + bsComplex phase = exp(bsComplex(0, kmag * k[iw].dot(c))); + + //compute the internal field if we are inside a sphere + if(d <= a) + { + bsComplex knd = kmag * d * n; + sumUs += (1.0/nk) * A * phase * calc_Ui(knd, cos_theta, Nl, Alpha); + } + //otherwise compute the scattered field + else + { + //compute the argument for the spherical Hankel function + ptype kd = kmag * d; + sumUs += (1.0/nk) * A * phase * calc_Us(kd, cos_theta, Nl, Beta); + } + + } + + Us[i] += sumUs; + + +} + +void nearfieldStruct::scalarUs() +{ + //get the number of spheres + int nSpheres = sVector.size(); + + //if there are no spheres, nothing to do here + if(nSpheres == 0) + return; + + //time the calculation of the focused field + //gpuStartTimer(); + + //clear the scattered field + U.clear_gpu(); + + //create one thread for each pixel of the field slice + dim3 dimBlock(SQRT_BLOCK, SQRT_BLOCK); + dim3 dimGrid((U.R[0] + SQRT_BLOCK -1)/SQRT_BLOCK, (U.R[1] + SQRT_BLOCK - 1)/SQRT_BLOCK); + + //for each sphere + int Nl; + for(int s = 0; s>>(U.x_hat, + gpuk, + 1, + 2 * PI / lambda, + focus, + sVector[s].p, + sVector[s].a, + sVector[s].n, + gpuB, + gpuA, + Nl, + A, + pos, + U.R[0], + U.R[1]); + HANDLE_ERROR(cudaFree(gpuk)); + } + //otherwise copy all of the monte-carlo samples to the GPU and compute + else + { + HANDLE_ERROR(cudaMalloc( (void**)&gpuk, sizeof(bsVector) * inWaves.size() )); + HANDLE_ERROR(cudaMemcpy( gpuk, &inWaves[0], sizeof(bsVector) * inWaves.size(), cudaMemcpyHostToDevice)); + + //compute the amplitude that makes it through the condenser + ptype subA = 2 * PI * A * ( (1 - cos(asin(condenser[1]))) - (1 - cos(asin(condenser[0]))) ); + + gpuScalarUsp<<>>(U.x_hat, + gpuk, + inWaves.size(), + 2 * PI / lambda, + focus, + sVector[s].p, + sVector[s].a, + sVector[s].n, + gpuB, + gpuA, + Nl, + subA, + pos, + U.R[0], + U.R[1]); + HANDLE_ERROR(cudaFree(gpuk)); + + + } + + //free memory for scattering coefficients + HANDLE_ERROR(cudaFree(gpuA)); + HANDLE_ERROR(cudaFree(gpuB)); + } + + + + //float t = gpuStopTimer(); + //std::cout<<"Scalar Us Time: "< +#include "rts/cuda_handle_error.h" +#include "rts/cuda_timer.h" + +__global__ void gpuScalarUsp(bsComplex* Ufx, bsComplex* Ufy, bsComplex* Ufz, + bsComplex* Ux, bsComplex* Uy, bsComplex* Uz, + bsPoint* ps, ptype* as, int ns, bsRect ABCD, int uR, int vR) +{ + + //get the current coordinate in the plane slice + int iu = blockIdx.x * blockDim.x + threadIdx.x; + int iv = blockIdx.y * blockDim.y + threadIdx.y; + + //make sure that the thread indices are in-bounds + if(iu >= uR || iv >= vR) return; + + //compute the index (easier access to the scalar field array) + int i = iv*uR + iu; + + //compute the parameters for u and v + ptype u = (ptype)iu / uR; + ptype v = (ptype)iv / vR; + + //get the rtsPoint in world space and then the r vector + bsPoint p = ABCD(u, v); + bsVector r; + ptype d; + + //if we are inside of a sphere, return + for(int is=0; is>>(Uf.x_hat, + Uf.y_hat, + Uf.z_hat, + U.x_hat, + U.y_hat, + U.z_hat, + gpu_p, + gpu_a, + nSpheres, + pos, + U.R[0], + U.R[1]); + + + + //free sphere lists + HANDLE_ERROR(cudaFree(gpu_p)); + HANDLE_ERROR(cudaFree(gpu_a)); + + //float t = gpuStopTimer(); + //std::cout<<"Add Us Time: "< +#include +#include +#include +using namespace std; + +#include +namespace po = boost::program_options; + +static void loadSpheres(string sphereList) +{ + /*This function loads a list of sphere given in the string sphereList + The format is: + x y z a m + where + x, y, z = sphere position (required) + a = sphere radius (required) + m = material ID (optional) */ + + std::stringstream ss(sphereList); + + while(!ss.eof()) + { + //create a new sphere + sphere newS; + + //get the sphere data + ss>>newS.p[0]; + ss>>newS.p[1]; + ss>>newS.p[2]; + ss>>newS.a; + + if(ss.peek() != '\n') + ss>>newS.iMaterial; + + //add the new sphere to the sphere vector + SCOPE->nf.sVector.push_back(newS); + + //ignore the rest of the line + ss.ignore(std::numeric_limits::max(), '\n'); + + //check out the next element (this should set the EOF error flag) + ss.peek(); + } + + + +} + +static void loadSpheres(po::variables_map vm) +{ + //if a files are specified + if(vm.count("sphere-file")) + { + cout<<"Sphere files detected."< filenames = vm["sphere-file"].as< vector >(); + //load each file + for(int iS=0; iS(ifs)), std::istreambuf_iterator()); + + //load the list of spheres from a string + loadSpheres(instr); + } + } + + //load the sphere from the command line + if(vm.count("sx") || vm.count("sy") || vm.count("sz") || vm.count("s")) + { + //create a new sphere + sphere newS; + + //set defaults + if(vm.count("sx")) + newS.p[0] = vm["sx"].as(); + else + newS.p[0] = DEFAULT_SPHERE_X; + + + if(vm.count("sy")) + newS.p[1] = vm["sy"].as(); + else + newS.p[1] = DEFAULT_SPHERE_Y; + + if(vm.count("sz")) + newS.p[2] = vm["sz"].as(); + else + newS.p[2] = DEFAULT_SPHERE_Z; + + if(vm.count("radius")) + newS.a = vm["radius"].as(); + else + newS.a = DEFAULT_SPHERE_A; + + //add the sphere to the sphere vector + SCOPE->nf.sVector.push_back(newS); + + } +} + +static void loadMaterials(po::variables_map vm) +{ + //if materials are specified at the command line + if(vm.count("materials")) + { + vector matVec = vm["materials"].as< vector >(); + if(matVec.size() %2 != 0) + { + cout<<"BIMSim Error: materials must be specified in n, k pairs"< newM(vm["lambda"].as(), matVec[i], matVec[i+1]); + SCOPE->nf.mVector.push_back(newM); + } + } + else + { + //add the command line material as the default (material 0) + rts::material newM(vm["lambda"].as(), vm["n"].as(), vm["k"].as()); + SCOPE->nf.mVector.push_back(newM); + } + + //if file names are specified, load the materials + if(vm.count("material-file")) + { + vector filenames = vm["material-file"].as< vector >(); + for(int i=0; i(ifs)), std::istreambuf_iterator()); + + //load the list of spheres from a string + rts::material newM; + newM.fromStr(instr, ""); + SCOPE->nf.mVector.push_back(newM); + } + } + +} + +static void loadNearfieldParams(po::variables_map vm) +{ + //test to see if we are simulating a plane wave + bool planeWave = DEFAULT_PLANEWAVE; + if(vm.count("plane-wave")) + planeWave = !planeWave; + SCOPE->nf.planeWave = planeWave; + + //get the wavelength + SCOPE->nf.lambda = vm["lambda"].as(); + + //get the incident field amplitude + SCOPE->nf.A = vm["amplitude"].as(); + + //get the condenser parameters + SCOPE->nf.condenser[0] = vm["condenser-min"].as(); + SCOPE->nf.condenser[1] = vm["condenser-max"].as(); + + + //get the focal rtsPoint position + SCOPE->nf.focus[0] = vm["fx"].as(); + SCOPE->nf.focus[1] = vm["fy"].as(); + SCOPE->nf.focus[2] = vm["fz"].as(); + + //get the incident light direction (k-vector) + bsVector spherical; + spherical[0] = 1.0; + spherical[1] = vm["theta"].as(); + spherical[2] = vm["phi"].as(); + SCOPE->nf.k = spherical.sph2cart(); + + + //incident field order + SCOPE->nf.m = vm["field-order"].as(); + + //number of Monte-Carlo samples + SCOPE->nf.nWaves = vm["samples"].as(); + + + +} + +static void loadSliceParams(po::variables_map vm) +{ + //parameters for the sample plane + + + //set the default values for the slice position and orientation + bsPoint pMin(vm["plane-min-x"].as(), vm["plane-min-y"].as(), vm["plane-min-z"].as()); + bsPoint pMax(vm["plane-max-x"].as(), vm["plane-max-y"].as(), vm["plane-max-z"].as()); + bsVector normal(vm["plane-norm-x"].as(), vm["plane-norm-y"].as(), vm["plane-norm-z"].as()); + SCOPE->setPos(pMin, pMax, normal); + + //resolution + SCOPE->setRes(vm["resolution"].as(), + vm["resolution"].as(), + vm["padding"].as(), + vm["supersample"].as()); + + + + + + SCOPE->setNearfield(); + + + +} + +static void loadMicroscopeParams(po::variables_map vm) +{ + //objective + SCOPE->objective[0] = vm["objective-min"].as(); + SCOPE->objective[1] = vm["objective-max"].as(); + + + + + +} + +static void loadOutputParams(po::variables_map vm) +{ + + //image parameters + //component of the field to be saved + std::string fieldStr; + fieldStr = vm["output-type"].as(); + + if(fieldStr == "magnitude") + gFileOut.field = fileoutStruct::fieldMag; + else if(fieldStr == "intensity") + gFileOut.field = fileoutStruct::fieldIntensity; + else if(fieldStr == "polarization") + gFileOut.field = fileoutStruct::fieldPolar; + else if(fieldStr == "imaginary") + gFileOut.field = fileoutStruct::fieldImag; + else if(fieldStr == "real") + gFileOut.field = fileoutStruct::fieldReal; + else if(fieldStr == "angular-spectrum") + gFileOut.field = fileoutStruct::fieldAngularSpectrum; + + + //image file names + gFileOut.intFile = vm["intensity"].as(); + gFileOut.absFile = vm["absorbance"].as(); + gFileOut.transFile = vm["transmittance"].as(); + gFileOut.nearFile = vm["near-field"].as(); + gFileOut.farFile = vm["far-field"].as(); + + //colormap + std::string cmapStr; + cmapStr = vm["colormap"].as(); + if(cmapStr == "brewer") + gFileOut.colormap = rts::colormap::cmBrewer; + else if(cmapStr == "gray") + gFileOut.colormap = rts::colormap::cmGrayscale; + else + cout<<"color-map value not recognized (using default): "<nf.toStr(); + +} + +static void SetOptions(po::options_description &desc) +{ + desc.add_options() + ("help,h", "prints this help") + ("plane-wave,P", "simulates an incident plane wave") + ("intensity,I", po::value()->default_value(DEFAULT_INTENSITY_FILE), "output measured intensity (filename)") + ("absorbance,A", po::value()->default_value(DEFAULT_ABSORBANCE_FILE), "output measured absorbance (filename)") + ("transmittance,T", po::value()->default_value(DEFAULT_TRANSMITTANCE_FILE), "output measured transmittance (filename)") + ("far-field,F", po::value()->default_value(DEFAULT_FAR_FILE), "output far-field at detector (filename)") + ("near-field,N", po::value()->default_value(DEFAULT_NEAR_FILE), "output field at focal plane (filename)") + ("extended-source,X", po::value()->default_value(DEFAULT_EXTENDED_SOURCE), "image of source at focus (filename)") + //("sx,x", po::value()->default_value(DEFAULT_SPHERE_X), "sphere coordinates") + //("sy,y", po::value()->default_value(DEFAULT_SPHERE_Y)) + //("sz,z", po::value()->default_value(DEFAULT_SPHERE_Z)) + ("sx,x", po::value(), "sphere coordinates") + ("sy,y", po::value()) + ("sz,z", po::value()) + ("radius,r", po::value()->default_value(DEFAULT_SPHERE_A), "sphere radius") + ("samples,s", po::value()->default_value(DEFAULT_SAMPLES), "Monte-Carlo samples used to compute Us") + ("sphere-file,S", po::value< vector >()->multitoken(), "sphere file:\n [x y z radius material]") + ("amplitude,a", po::value()->default_value(DEFAULT_AMPLITUDE), "incident field amplitude") + ("n,n", po::value()->default_value(DEFAULT_N, "1.4"), "sphere phase speed") + ("k,k", po::value()->default_value(DEFAULT_K), "sphere absorption coefficient") + ("material-file,M", po::value< vector >()->multitoken(), "material file:\n [lambda n k]") + ("materials", po::value< vector >()->multitoken(), "materials specified using n, k pairs:\n ex. --materials n1 k1 n2 k2\n (if used --n and --k are ignored)") + ("lambda,l", po::value()->default_value(DEFAULT_LAMBDA), "incident wavelength") + ("theta,t", po::value()->default_value(DEFAULT_K_THETA), "light direction (polar coords)") + ("phi,p", po::value()->default_value(DEFAULT_K_PHI)) + ("fx", po::value()->default_value(DEFAULT_FOCUS_X), "incident focal point") + ("fy", po::value()->default_value(DEFAULT_FOCUS_Y)) + ("fz", po::value()->default_value(DEFAULT_FOCUS_Z)) + ("condenser-max,C", po::value()->default_value(DEFAULT_CONDENSER_MAX), "condenser numerical aperature") + ("condenser-min,c", po::value()->default_value(DEFAULT_CONDENSER_MIN), "condenser obscuration NA") + ("objective-max,O", po::value()->default_value(DEFAULT_OBJECTIVE_MAX), "objective numerical aperature") + ("objective-min,o", po::value()->default_value(DEFAULT_OBJECTIVE_MIN), "objective obscuration NA") + ("field-order", po::value()->default_value(DEFAULT_FIELD_ORDER), "order of the incident field") + ("output-type,f", po::value()->default_value(DEFAULT_FIELD_TYPE), "output field value:\n magnitude, polarization, real, imaginary, angular-spectrum") + ("resolution,R", po::value()->default_value(DEFAULT_SLICE_RES), "resolution of the detector") + ("padding,d", po::value()->default_value(DEFAULT_PADDING), "FFT padding for the objective bandpass") + ("supersample", po::value()->default_value(DEFAULT_SUPERSAMPLE), "super-sampling rate for the detector field") + ("colormap", po::value()->default_value(DEFAULT_COLORMAP), "colormap: gray, brewer") + ("append", "append result to an existing file\n (binary files only)") + ("plane-min-x,u", po::value()->default_value(DEFAULT_SLICE_MIN_X), "lower-left corner of the field slice") + ("plane-min-y,v", po::value()->default_value(DEFAULT_SLICE_MIN_Y)) + ("plane-min-z,w", po::value()->default_value(DEFAULT_SLICE_MIN_Z)) + ("plane-max-x,U", po::value()->default_value(DEFAULT_SLICE_MAX_X), "upper-right corner of the field slice") + ("plane-max-y,V", po::value()->default_value(DEFAULT_SLICE_MAX_Y)) + ("plane-max-z,W", po::value()->default_value(DEFAULT_SLICE_MAX_Z)) + ("plane-norm-x", po::value()->default_value(DEFAULT_SLICE_NORM_X), "field slice normal") + ("plane-norm-y", po::value()->default_value(DEFAULT_SLICE_NORM_Y)) + ("plane-norm-z", po::value()->default_value(DEFAULT_SLICE_NORM_Z)); +} + +static void LoadParameters(int argc, char *argv[]) +{ + //create an option description + po::options_description desc("Allowed options"); + + //fill it with options + SetOptions(desc); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + //display help and exit + if(vm.count("help")) + { + cout<() != "") + { + //load the point sources + string filename = vm["extended-source"].as(); + SCOPE->LoadExtendedSource(filename); + + } + + + + + +} diff --git a/rtsEnvi.h b/rtsEnvi.h new file mode 100644 index 0000000..250526c --- /dev/null +++ b/rtsEnvi.h @@ -0,0 +1,610 @@ +#ifndef RTS_ENVI_H +#define RTS_ENVI_H + +#include +#include +#include +#include + +/* This is a class for reading and writing ENVI binary files. This class will be updated as needed. + +What this class CAN currently do: + *) Write band images to a BSQ file. + *) Append band images to a BSQ file. + +*/ + +//information from an ENVI header file +//A good resource can be found here: http://www.exelisvis.com/docs/enviheaderfiles.html +struct EnviHeader +{ + std::string name; + + std::string description; + + unsigned int samples; //x-axis + unsigned int lines; //y-axis + unsigned int bands; //spectral axis + unsigned int header_offset; //header offset for binary file (in bytes) + std::string file_type; //should be "ENVI Standard" + unsigned int data_type; //data representation; common value is 4 (32-bit float) + + enum interleaveType {BIP, BIL, BSQ}; //bip = Z,X,Y; bil = X,Z,Y; bsq = X,Y,Z + interleaveType interleave; + + std::string sensor_type; //not really used + + enum endianType {endianLittle, endianBig}; + endianType byte_order; //little = least significant bit first (most systems) + + double x_start, y_start; //coordinates of the upper-left corner of the image + std::string wavelength_units; //stored wavelength units + std::string z_plot_titles[2]; + + double pixel_size[2]; //pixel size along X and Y + + std::vector band_names; //name for each band in the image + std::vector wavelength; //wavelength for each band + + EnviHeader() + { + name = ""; + + //specify default values for a new or empty ENVI file + samples = 0; + lines = 0; + bands = 0; + header_offset = 0; + data_type = 4; + interleave = BSQ; + byte_order = endianLittle; + x_start = y_start = 0; + pixel_size[0] = pixel_size[1] = 1; + + //strings + file_type = "ENVI Standard"; + sensor_type = "Unknown"; + wavelength_units = "Unknown"; + z_plot_titles[0] = z_plot_titles[1] = "Unknown"; + } + + std::string trim(std::string line) + { + //trims whitespace from the beginning and end of line + int start_i, end_i; + for(start_i=0; start_i < line.length(); start_i++) + if(line[start_i] != 32) + { + break; + } + + for(end_i = line.length()-1; end_i >= start_i; end_i--) + if(line[end_i] != ' ') + { + break; + } + + return line.substr(start_i, end_i - start_i+1); + } + + + std::string get_token(std::string line) + { + //returns a variable name; in this case we look for the '=' sign + size_t i = line.find_first_of('='); + + std::string result; + if(i != std::string::npos) + result = trim(line.substr(0, i-1)); + + return result; + } + + std::string get_data_str(std::string line) + { + size_t i = line.find_first_of('='); + + std::string result; + if(i != std::string::npos) + result = trim(line.substr(i+1)); + else + { + std::cout<<"ENVI Header error - data not found for token: "< get_string_seq(std::string token, std::string sequence) + { + //this function returns a sequence of comma-delimited strings + std::vector result; + + std::string entry; + size_t i; + do + { + i = sequence.find_first_of(','); + entry = sequence.substr(0, i); + sequence = sequence.substr(i+1); + result.push_back(trim(entry)); + }while(i != std::string::npos); + + return result; + } + + std::vector get_double_seq(std::string token, std::string sequence) + { + //this function returns a sequence of comma-delimited strings + std::vector result; + + std::string entry; + size_t i; + do + { + i = sequence.find_first_of(','); + entry = sequence.substr(0, i); + sequence = sequence.substr(i+1); + result.push_back(atof(entry.c_str())); + }while(i != std::string::npos); + + return result; + } + + void load(std::string filename) + { + name = filename; + + //open the header file + std::ifstream file(name.c_str()); + + if(!file) + { + std::cout<<"ENVI header file not found: "< pxsize = get_double_seq(token, string_sequence); + pixel_size[0] = pxsize[0]; + pixel_size[1] = pxsize[1]; + } + else if(token == "z plot titles") + { + std::string string_sequence = get_brace_str(token, line, file); + std::vector titles = get_string_seq(token, string_sequence); + z_plot_titles[0] = titles[0]; + z_plot_titles[1] = titles[1]; + } + + else if(token == "samples") + samples = atoi(get_data_str(line).c_str()); + else if(token == "lines") + lines = atoi(get_data_str(line).c_str()); + else if(token == "bands") + bands = atoi(get_data_str(line).c_str()); + else if(token == "header offset") + header_offset = atoi(get_data_str(line).c_str()); + else if(token == "file type") + file_type = get_data_str(line); + else if(token == "data type") + data_type = atoi(get_data_str(line).c_str()); + else if(token == "interleave") + { + std::string interleave_str = get_data_str(line); + if(interleave_str == "bip") + interleave = BIP; + else if(interleave_str == "bil") + interleave = BIL; + else if(interleave_str == "bsq") + interleave = BSQ; + } + else if(token == "sensor type") + sensor_type = get_data_str(line); + else if(token == "byte order") + byte_order = (endianType)atoi(get_data_str(line).c_str()); + else if(token == "x start") + x_start = atof(get_data_str(line).c_str()); + else if(token == "y start") + y_start = atof(get_data_str(line).c_str()); + else if(token == "wavelength units") + wavelength_units = get_data_str(line); + } + + //close the file + file.close(); + } + + void save(std::string filename) + { + //open a file + std::ofstream outfile(filename.c_str()); + + //write the ENVI type identifier + outfile<<"ENVI"< 0) + { + outfile<<"band names = {"<(S, filename, R[0], R[1], vmin, vmax, cmap); +} + +void scalarslice::toImage(std::string filename, bool positive, rts::colormap::colormapType cmap) +{ + cublasStatus_t stat; + cublasHandle_t handle; + + //create a CUBLAS handle + stat = cublasCreate(&handle); + if(stat != CUBLAS_STATUS_SUCCESS) + { + std::cout<<"CUBLAS Error: initialization failed"< +#include + +using namespace rts; +using namespace std; + +int cbessjyva(double v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp); + +int cbessjyva_sph(int v,complex z,double &vm,complex*cjv, + complex*cyv,complex*cjvp,complex*cyvp); + +void sphere::calcCoeff(ptype lambda, rtsComplex ri) +{ + /* These calculations are done at high-precision on the CPU + since they are only required once for each wavelength. + + Input: + + lambda = wavelength of the incident field + n = complex refractive index of the sphere + */ + + //clear the previous coefficients + A.clear(); + B.clear(); + + //convert to an std complex value + complex nc(ri.real(), ri.imag()); + n = ri; + + //compute the magnitude of the k vector + double k = 2 * PI / lambda; + complex kna = nc * k * (double)a; + + //compute the arguments k*a and k*n*a + complex ka(k * a, 0.0); + + //allocate space for the Bessel functions of the first and second kind (and derivatives) + int bytes = sizeof(complex) * (Nl + 1); + complex* cjv_ka = (complex*)malloc(bytes); + complex* cyv_ka = (complex*)malloc(bytes); + complex* cjvp_ka = (complex*)malloc(bytes); + complex* cyvp_ka = (complex*)malloc(bytes); + complex* cjv_kna = (complex*)malloc(bytes); + complex* cyv_kna = (complex*)malloc(bytes); + complex* cjvp_kna = (complex*)malloc(bytes); + complex* cyvp_kna = (complex*)malloc(bytes); + + //allocate space for the spherical Hankel functions and derivative + complex* chv_ka = (complex*)malloc(bytes); + complex* chvp_ka = (complex*)malloc(bytes); + + //compute the bessel functions using the CPU-based algorithm + double vm; + cbessjyva_sph(Nl, ka, vm, cjv_ka, cyv_ka, cjvp_ka, cyvp_ka); + cbessjyva_sph(Nl, kna, vm, cjv_kna, cyv_kna, cjvp_kna, cyvp_kna); + + + //cout<<"Begin Sphere---------"< +#include +#include +#include +#include "fieldslice.h" +#include "dataTypes.h" + + + +struct sphere +{ + + //sphere position + bsPoint p; + + //sphere radius + ptype a; + + //sphere material index + int iMaterial; + + //rtsPointer to the scattered field produced by a plane wave + // this is a function of cos(theta) and |r| (distance from sphere center) + //fieldslice surface; + + //resolution of the scattered field + int thetaR, rR; + + //sphere order + int Nl; + + //refractive index for the current lambda + bsComplex n; + + //external scattering coefficients + std::vector B; + + //internal scattering coefficients + std::vector A; + + sphere(ptype x = 0.0f, ptype y = 0.0f, ptype z = 0.0f, ptype a = 0.0f, int m = 0, int ang = 128) + { + this->p = bsPoint(x, y, z); + this->a = a; + this->iMaterial = m; + + //surface = fieldslice(ang, ang/2); + } + + std::string toStr() + { + std::stringstream ss; + + ss< n); + + + + +}; + +#endif diff --git a/warnings.cpp b/warnings.cpp new file mode 100644 index 0000000..f9f2949 --- /dev/null +++ b/warnings.cpp @@ -0,0 +1,19 @@ +#include "warnings.h" + +void TestSimulation(nearfieldStruct* nf, microscopeStruct* scope, fileoutStruct* fout) +{ +/* //This function tests the simulation input and outputs any warnings or errors + + + //if the output is a detector image, the type should be intensity or absorbance + if(fout->source == fileoutStruct::imageDetector) + { + if(fout->field != fileoutStruct::fieldIntensity || fout->field != fileoutStruct::fieldAbsorbance) + { + cout<<"The detector only supports intensity and absorbance measurements. Defaulting to intensity."<field = fileoutStruct::fieldIntensity; + } + } + + */ +} diff --git a/warnings.h b/warnings.h new file mode 100644 index 0000000..026685b --- /dev/null +++ b/warnings.h @@ -0,0 +1,5 @@ +#include "nearfield.h" +#include "microscope.h" +#include "fileout.h" + +void TestSimulation(nearfieldStruct* nf, microscopeStruct* scope, fileoutStruct* fout); diff --git a/win32/QtCore4.dll b/win32/QtCore4.dll new file mode 100644 index 0000000..09f5f70 Binary files /dev/null and b/win32/QtCore4.dll differ diff --git a/win32/QtGui4.dll b/win32/QtGui4.dll new file mode 100644 index 0000000..d5f18c2 Binary files /dev/null and b/win32/QtGui4.dll differ diff --git a/win32/bimsim.exe b/win32/bimsim.exe new file mode 100644 index 0000000..78967fd Binary files /dev/null and b/win32/bimsim.exe differ diff --git a/win32/cublas32_50_35.dll b/win32/cublas32_50_35.dll new file mode 100644 index 0000000..d2556d5 Binary files /dev/null and b/win32/cublas32_50_35.dll differ diff --git a/win32/cudart32_50_35.dll b/win32/cudart32_50_35.dll new file mode 100644 index 0000000..deca212 Binary files /dev/null and b/win32/cudart32_50_35.dll differ diff --git a/win32/cufft32_50_35.dll b/win32/cufft32_50_35.dll new file mode 100644 index 0000000..da4b5ca Binary files /dev/null and b/win32/cufft32_50_35.dll differ diff --git a/win32/einstein.jpg b/win32/einstein.jpg new file mode 100644 index 0000000..97b073d Binary files /dev/null and b/win32/einstein.jpg differ diff --git a/win32/mc_spheres.txt b/win32/mc_spheres.txt new file mode 100644 index 0000000..de3f57d --- /dev/null +++ b/win32/mc_spheres.txt @@ -0,0 +1,3 @@ +-3 0 -1 1 0 +0 0 0 1 0 +3 0 1 1 0 \ No newline at end of file diff --git a/win32/spheres.txt b/win32/spheres.txt new file mode 100644 index 0000000..9c8418a --- /dev/null +++ b/win32/spheres.txt @@ -0,0 +1,4 @@ +-3 0 0 1 0 +0 3 0 1 1 +3 0 0 1 2 +0 -3 0 1 3 -- libgit2 0.21.4