Commit 39a92d0390bbd9240b2493d5a98afd9f17f0633b

Authored by Pavel Govyadinov
2 parents 8823488b 9b563709

Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib into Graph

cmake/FindFANN.cmake 0 → 100644
  1 +#
  2 +# Windows users: define the GLEW_PATH environment variable to point
  3 +# to the directory containing:
  4 +# include/fann.h
  5 +# lib/*fann.lib
  6 +
  7 +
  8 +# FANN_FOUND - system has fann
  9 +# FANN_INCLUDE_DIRS - the fann include directory
  10 +# FANN_LIBRARIES - Link these to use fann
  11 +# FANN_DEFINITIONS - Compiler switches required for using fann
  12 +#
  13 +
  14 +if(FANN_LIBRARIES AND FANN_INCLUDE_DIRS)
  15 + set(FANN_FOUND TRUE)
  16 +else()
  17 + find_path(FANN_INCLUDE_DIR
  18 + NAMES
  19 + fann.h
  20 + PATHS
  21 + $ENV{FANN_PATH}/include
  22 + ${FANN_DIR}/include
  23 + /usr/include
  24 + /usr/local/include
  25 + /opt/local/include
  26 + /sw/include
  27 + )
  28 +
  29 + set( _libraries fann doublefann fixedfann floatfann )
  30 +
  31 + foreach( _lib ${_libraries} )
  32 + string( TOUPPER ${_lib} _name )
  33 +
  34 + find_library(${_name}_LIBRARY
  35 + NAMES
  36 + ${_lib}
  37 + PATHS
  38 + $ENV{FANN_PATH}/lib
  39 + ${FANN_DIR}/lib
  40 + /usr/lib
  41 + /usr/local/lib
  42 + /opt/local/lib
  43 + /sw/lib
  44 + )
  45 +
  46 + endforeach()
  47 +
  48 +
  49 + set(FANN_INCLUDE_DIRS
  50 + ${FANN_INCLUDE_DIR}
  51 + )
  52 +
  53 + set(FANN_LIBRARIES
  54 + ${FANN_LIBRARIES}
  55 + ${FANN_LIBRARY}
  56 + ${DOUBLEFANN_LIBRARY}
  57 + ${FIXEDFANN_LIBRARY}
  58 + ${FLOATFANN_LIBRARY}
  59 + )
  60 +
  61 + if( UNIX )
  62 + set( FANN_LIBRARIES ${FANN_LIBRARIES} m )
  63 + endif()
  64 +
  65 + if(FANN_INCLUDE_DIRS AND FANN_LIBRARIES)
  66 + set(FANN_FOUND TRUE)
  67 + endif()
  68 +
  69 + if(FANN_FOUND)
  70 + if(NOT FANN_FIND_QUIETLY)
  71 + message(STATUS "Found FANN:")
  72 + message(STATUS "FANN_INCLUDE_DIRS: ${FANN_INCLUDE_DIRS}")
  73 + message(STATUS "FANN_LIBRARIES: ${FANN_LIBRARIES}")
  74 + endif()
  75 + else()
  76 + if(FANN_FIND_REQUIRED)
  77 + message(FATAL_ERROR "Could not find FANN")
  78 + endif()
  79 + endif()
  80 +
  81 + mark_as_advanced(FANN_INCLUDE_DIRS FANN_LIBRARIES)
  82 +endif()
... ...
cmake/FindGLEW.cmake 0 → 100644
  1 +# Copyright (c) 2012-2016 DreamWorks Animation LLC
  2 +#
  3 +# All rights reserved. This software is distributed under the
  4 +# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ )
  5 +#
  6 +# Redistributions of source code must retain the above copyright
  7 +# and license notice and the following restrictions and disclaimer.
  8 +#
  9 +# * Neither the name of DreamWorks Animation nor the names of
  10 +# its contributors may be used to endorse or promote products derived
  11 +# from this software without specific prior written permission.
  12 +#
  13 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  14 +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  15 +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  16 +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  17 +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL,
  18 +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  19 +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20 +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21 +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22 +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  23 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24 +# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE
  25 +# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00.
  26 +#
  27 +
  28 +#-*-cmake-*-
  29 +# - Find GLEW
  30 +#
  31 +# Author : Nicholas Yue yue.nicholas@gmail.com
  32 +#
  33 +# This auxiliary CMake file helps in find the GLEW headers and libraries
  34 +#
  35 +# GLEW_FOUND set if Glew is found.
  36 +# GLEW_INCLUDE_DIR GLEW's include directory
  37 +# GLEW_glew_LIBRARY GLEW libraries
  38 +# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context)
  39 +
  40 +FIND_PACKAGE ( PackageHandleStandardArgs )
  41 +
  42 +FIND_PATH( GLEW_LOCATION include/GL/glew.h
  43 + "$ENV{GLEW_ROOT}"
  44 + NO_DEFAULT_PATH
  45 + NO_SYSTEM_ENVIRONMENT_PATH
  46 + )
  47 +
  48 +FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW
  49 + REQUIRED_VARS GLEW_LOCATION
  50 + )
  51 +
  52 +IF ( GLEW_LOCATION )
  53 +
  54 + SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path")
  55 +
  56 + SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
  57 + IF (GLEW_USE_STATIC_LIBS)
  58 + IF (APPLE)
  59 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
  60 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  61 + NO_DEFAULT_PATH
  62 + NO_SYSTEM_ENVIRONMENT_PATH
  63 + )
  64 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  65 + NO_DEFAULT_PATH
  66 + NO_SYSTEM_ENVIRONMENT_PATH
  67 + )
  68 + # MESSAGE ( "APPLE STATIC" )
  69 + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
  70 + ELSEIF (WIN32)
  71 + # Link library
  72 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
  73 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib )
  74 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib )
  75 + ELSE (APPLE)
  76 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
  77 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  78 + NO_DEFAULT_PATH
  79 + NO_SYSTEM_ENVIRONMENT_PATH
  80 + )
  81 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  82 + NO_DEFAULT_PATH
  83 + NO_SYSTEM_ENVIRONMENT_PATH
  84 + )
  85 + # MESSAGE ( "LINUX STATIC" )
  86 + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
  87 + ENDIF (APPLE)
  88 + ELSE ()
  89 + IF (APPLE)
  90 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib")
  91 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib )
  92 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib )
  93 + ELSEIF (WIN32)
  94 + # Link library
  95 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
  96 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib )
  97 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib )
  98 + # Load library
  99 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll")
  100 + FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin
  101 + NO_DEFAULT_PATH
  102 + NO_SYSTEM_ENVIRONMENT_PATH
  103 + )
  104 + FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin
  105 + NO_DEFAULT_PATH
  106 + NO_SYSTEM_ENVIRONMENT_PATH
  107 + )
  108 + ELSE (APPLE)
  109 + # Unices
  110 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  111 + NO_DEFAULT_PATH
  112 + NO_SYSTEM_ENVIRONMENT_PATH
  113 + )
  114 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  115 + NO_DEFAULT_PATH
  116 + NO_SYSTEM_ENVIRONMENT_PATH
  117 + )
  118 + ENDIF (APPLE)
  119 + ENDIF ()
  120 + # MUST reset
  121 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES})
  122 +
  123 + SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library")
  124 + SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library")
  125 +
  126 +ENDIF ()
... ...
cmake/FindGLUT.cmake 0 → 100644
  1 +#.rst:
  2 +# FindGLUT
  3 +# --------
  4 +#
  5 +# try to find glut library and include files.
  6 +#
  7 +# IMPORTED Targets
  8 +# ^^^^^^^^^^^^^^^^
  9 +#
  10 +# This module defines the :prop_tgt:`IMPORTED` targets:
  11 +#
  12 +# ``GLUT::GLUT``
  13 +# Defined if the system has GLUT.
  14 +#
  15 +# Result Variables
  16 +# ^^^^^^^^^^^^^^^^
  17 +#
  18 +# This module sets the following variables:
  19 +#
  20 +# ::
  21 +#
  22 +# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc.
  23 +# GLUT_LIBRARIES, the libraries to link against
  24 +# GLUT_FOUND, If false, do not try to use GLUT.
  25 +#
  26 +# Also defined, but not for general use are:
  27 +#
  28 +# ::
  29 +#
  30 +# GLUT_glut_LIBRARY = the full path to the glut library.
  31 +# GLUT_Xmu_LIBRARY = the full path to the Xmu library.
  32 +# GLUT_Xi_LIBRARY = the full path to the Xi Library.
  33 +
  34 +#=============================================================================
  35 +# Copyright 2001-2009 Kitware, Inc.
  36 +#
  37 +# Distributed under the OSI-approved BSD License (the "License");
  38 +# see accompanying file Copyright.txt for details.
  39 +#
  40 +# This software is distributed WITHOUT ANY WARRANTY; without even the
  41 +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  42 +# See the License for more information.
  43 +#=============================================================================
  44 +# (To distribute this file outside of CMake, substitute the full
  45 +# License text for the above reference.)
  46 +
  47 +if (WIN32)
  48 + find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h
  49 + PATHS $ENV{GLUT_ROOT_PATH}/include )
  50 +
  51 + if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  52 + find_library( GLUT_glut_LIBRARY NAMES freeglut
  53 + PATHS
  54 + $ENV{GLUT_ROOT_PATH}/lib/x64
  55 +
  56 + NO_DEFAULT_PATH
  57 + )
  58 + else( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  59 + find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut
  60 + PATHS
  61 + ${OPENGL_LIBRARY_DIR}
  62 + $ENV{GLUT_ROOT_PATH}/lib
  63 + )
  64 + endif( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  65 +
  66 +else ()
  67 +
  68 + if (APPLE)
  69 + find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR})
  70 + find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX")
  71 + find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX")
  72 +
  73 + if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa)
  74 + add_library(GLUT::Cocoa UNKNOWN IMPORTED)
  75 + # Cocoa should always be a Framework, but we check to make sure.
  76 + if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$")
  77 + set_target_properties(GLUT::Cocoa PROPERTIES
  78 + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}")
  79 + else()
  80 + set_target_properties(GLUT::Cocoa PROPERTIES
  81 + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}")
  82 + endif()
  83 + endif()
  84 + else ()
  85 +
  86 + if (BEOS)
  87 +
  88 + set(_GLUT_INC_DIR /boot/develop/headers/os/opengl)
  89 + set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86)
  90 +
  91 + else()
  92 +
  93 + find_library( GLUT_Xi_LIBRARY Xi
  94 + /usr/openwin/lib
  95 + )
  96 +
  97 + find_library( GLUT_Xmu_LIBRARY Xmu
  98 + /usr/openwin/lib
  99 + )
  100 +
  101 + if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi)
  102 + add_library(GLUT::Xi UNKNOWN IMPORTED)
  103 + set_target_properties(GLUT::Xi PROPERTIES
  104 + IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}")
  105 + endif()
  106 +
  107 + if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu)
  108 + add_library(GLUT::Xmu UNKNOWN IMPORTED)
  109 + set_target_properties(GLUT::Xmu PROPERTIES
  110 + IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}")
  111 + endif()
  112 +
  113 + endif ()
  114 +
  115 + find_path( GLUT_INCLUDE_DIR GL/glut.h
  116 + /usr/include/GL
  117 + /usr/openwin/share/include
  118 + /usr/openwin/include
  119 + /opt/graphics/OpenGL/include
  120 + /opt/graphics/OpenGL/contrib/libglut
  121 + ${_GLUT_INC_DIR}
  122 + )
  123 +
  124 + find_library( GLUT_glut_LIBRARY glut
  125 + /usr/openwin/lib
  126 + ${_GLUT_glut_LIB_DIR}
  127 + )
  128 +
  129 + unset(_GLUT_INC_DIR)
  130 + unset(_GLUT_glut_LIB_DIR)
  131 +
  132 + endif ()
  133 +
  134 +endif ()
  135 +
  136 +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR)
  137 +
  138 +if (GLUT_FOUND)
  139 + # Is -lXi and -lXmu required on all platforms that have it?
  140 + # If not, we need some way to figure out what platform we are on.
  141 + set( GLUT_LIBRARIES
  142 + ${GLUT_glut_LIBRARY}
  143 + ${GLUT_Xmu_LIBRARY}
  144 + ${GLUT_Xi_LIBRARY}
  145 + ${GLUT_cocoa_LIBRARY}
  146 + )
  147 +
  148 + if(NOT TARGET GLUT::GLUT)
  149 + add_library(GLUT::GLUT UNKNOWN IMPORTED)
  150 + set_target_properties(GLUT::GLUT PROPERTIES
  151 + INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}")
  152 + if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$")
  153 + set_target_properties(GLUT::GLUT PROPERTIES
  154 + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}")
  155 + else()
  156 + set_target_properties(GLUT::GLUT PROPERTIES
  157 + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}")
  158 + endif()
  159 +
  160 + if(TARGET GLUT::Xmu)
  161 + set_property(TARGET GLUT::GLUT APPEND
  162 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu)
  163 + endif()
  164 +
  165 + if(TARGET GLUT::Xi)
  166 + set_property(TARGET GLUT::GLUT APPEND
  167 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi)
  168 + endif()
  169 +
  170 + if(TARGET GLUT::Cocoa)
  171 + set_property(TARGET GLUT::GLUT APPEND
  172 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa)
  173 + endif()
  174 + endif()
  175 +
  176 + #The following deprecated settings are for backwards compatibility with CMake1.4
  177 + set (GLUT_LIBRARY ${GLUT_LIBRARIES})
  178 + set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR})
  179 +endif()
  180 +
  181 +mark_as_advanced(
  182 + GLUT_INCLUDE_DIR
  183 + GLUT_glut_LIBRARY
  184 + GLUT_Xmu_LIBRARY
  185 + GLUT_Xi_LIBRARY
  186 + )
... ...
cmake/FindSTIM.cmake
1   -include(FindPackageHandleStandardArgs)
2   -
3   -set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
4   -
5   -find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
6   -
7   -if(STIM_FOUND)
8   - set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
9   -endif()
10 1 \ No newline at end of file
  2 +# finds the STIM library (downloads it if it isn't present)
  3 +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
  4 +
  5 +include(FindPackageHandleStandardArgs)
  6 +
  7 +set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
  8 +
  9 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
  10 +
  11 +if(STIM_FOUND)
  12 + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
  13 +elseif(STIM_FOUND)
  14 + #if the STIM library isn't found, download it
  15 + #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists
  16 + #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git")
  17 + #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
  18 + #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH)
  19 + message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.")
  20 + message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib")
  21 +endif(STIM_FOUND)
  22 +
  23 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
... ...
matlab/bsq2tensorflow.m 0 → 100644
  1 +function T = bsq2tensorflow(I, n)
  2 +
  3 + sx = size(I, 1);
  4 + sy = size(I, 2) / n; %get the size of the tensor along Y
  5 + sb = size(I, 3);
  6 +
  7 + T = zeros(sx * sy * sb, n); %allocate space for the output matrix
  8 + for i = 0:n-1
  9 + ti = I(:, i * sy + 1 : i * sy + sy, :);
  10 + T(:, i+1) = ti(:);
  11 + end
  12 +end
  13 +
  14 +
0 15 \ No newline at end of file
... ...
matlab/enviLoadRaw.m
1 1 %loads an ENVI file without any manipulation (changing orientation)
  2 +% enviLoadRaw(filename, headername)
2 3 function M = enviLoadRaw(filename, headername)
3 4  
4 5 %if a header isn't provided, assume it's just the filename
... ...
matlab/enviSaveRaw.m
1 1 %saves an ENVI file without any manipulation, assumes (X, Y, S)
  2 +% enviSaveRaw(M, filename, headername)
2 3 function enviSaveRaw(M, filename, headername)
3 4  
4 5 %if a header isn't provided, assume it's just the filename
... ...
matlab/readspe.m 0 → 100644
  1 +% Read images of TIFF, SPE2.2(WinSpec) and SPE3.0(Lightfield)
  2 +% Version: JTL Jun-9-2016
  3 +% ----------------- READ THIS FIRST !!!!! --------------------------------
  4 +% Change the file name to "readspe" before use
  5 +% Example:
  6 +% Z = readspe(filename)
  7 +% Z = readspe(filename,'info')
  8 +% Z = readspe(filename,frame_index)
  9 +% Z = readspe(filename,frame_index,'info')
  10 +% Input:
  11 +% filename - filename string, e.g. 'image.spe'
  12 +% frame_index - frame index, start from 1
  13 +% If you have multiple frames, use a "for" loop
  14 +% 'info' - flag to show file info, i.e. dimension, number of frames, version
  15 +% Output:
  16 +% Z - UINT16 image (convert to double if you need)
  17 +% ------------------------------------------------------------------------
  18 +% Z = readspe (filename,frame_index,'info')
  19 +function Z = readspe (filename,varargin)
  20 +
  21 +if exist(filename) == 2
  22 +
  23 + Nfr = 1; % default read first frame
  24 + if nargin >1
  25 + if isa(varargin{1},'numeric')
  26 + Nfr = varargin{1};
  27 + end
  28 + end
  29 +
  30 + [~,name,ext] = fileparts(filename);
  31 + switch upper(ext)
  32 + case '.TIFF'
  33 + file_ver = 'TIFF';
  34 + Z = imread(filename);
  35 + [Y,X] = size(Z);
  36 + % datatype = class(Z)
  37 +
  38 + case '.SPE'
  39 + fid = fopen(filename);
  40 + I = fread(fid,Inf,'uint8');
  41 + X = double(typecast(uint8(I(43:44)),'uint16'));
  42 + Y = double(typecast(uint8(I(657:658)),'uint16'));
  43 + fr = typecast(uint8(I(1447:1450)),'int32');
  44 + spe_ver = typecast(uint8(I(1993:1996)),'single');
  45 + file_ver = ['SPE ' num2str(spe_ver)];
  46 + datatypeN = typecast(uint8(I(109:110)),'int16');
  47 + switch datatypeN
  48 + case 0 % 32-bit float
  49 + datatype = 'single'; datalength = 4;
  50 + case 1 % 32-bit signed integer
  51 + datatype = 'int32'; datalength = 4;
  52 + case 2 % 16-bit signed integer
  53 + datatype = 'int16'; datalength = 2;
  54 + case 3 % 16-bit unsigned integer
  55 + datatype = 'uint16'; datalength = 2;
  56 + case 8 % 32-bit unsigned integer
  57 + datatype = 'uint32'; datalength = 4;
  58 + end
  59 + % A = I(4101:4100+X*Y*2); % Default read first frame
  60 + A = I(4101+X*Y*datalength*(Nfr-1):4100+X*Y*datalength*Nfr);
  61 + B = typecast(uint8(A),datatype); % important
  62 + Z = reshape(B,X,Y);
  63 + Z = Z';
  64 + fclose(fid);
  65 + end
  66 +
  67 + if nargin >1
  68 + if varargin{end} == 'info'
  69 + display(['X = ' num2str(X)]);
  70 + display(['Y = ' num2str(Y)]);
  71 + if(exist('fr','var'));display(['Number of Frames: ' num2str(fr)]);end;
  72 + display(['File version: ' file_ver]);
  73 + end
  74 + end
  75 +
  76 +elseif exist(filename) == 0
  77 + display('File does not exist!');
  78 +end
0 79 \ No newline at end of file
... ...
matlab/spe2envi.m 0 → 100644
  1 +function spe2envi(filemask, outfile)
  2 +
  3 + filelist = dir(filemask);
  4 +
  5 + %get a list of date numbers
  6 + datenums = cell2mat({filelist.datenum});
  7 +
  8 + %sort the file order based on acquisition time
  9 + [~, id] = sort(datenums);
  10 +
  11 + %get the number of files
  12 + Y = length(id); %size of the image along Y
  13 +
  14 + %load the first file to determine the spectral and X-axis size
  15 + temp = readspe(filelist(1).name);
  16 + X = size(temp, 1); %size of the image along X
  17 + B = size(temp, 2); %number of bands in the image
  18 +
  19 + %create the cube
  20 + I = zeros(X, Y, B);
  21 +
  22 + %for each line
  23 + for y = 1:Y
  24 +
  25 + %read a SPE file
  26 + img = readspe(filelist(id(y)).name);
  27 +
  28 + I(:, y, :) = permute(img, [1 3 2]);
  29 + end
  30 +
  31 + enviSaveRaw(single(I), outfile, [outfile '.hdr']);
  32 +
  33 +
  34 +
... ...
matlab/brewermap.m renamed to matlab/stimBrewerMap.m
matlab/stimLoadAgilent.m 0 → 100644
  1 +%Loads a standard Agilent ResPro binary file
  2 +% stimLoadAgilent(filename)
  3 +function S = stimLoadAgilent(filename)
  4 +
  5 + fid = fopen(filename);
  6 + fseek(fid, 9, 'bof');
  7 + Z = fread(fid, 1, 'uint16');
  8 + fseek(fid, 13, 'cof');
  9 + X = fread(fid, 1, 'uint16');
  10 + Y = fread(fid, 1, 'uint16');
  11 +
  12 + fseek(fid, 1020, 'bof');
  13 +
  14 + S = reshape(fread(fid, [X, Y * Z], 'float32'), [X, Y, Z]);
  15 +
  16 +
0 17 \ No newline at end of file
... ...
matlab/stimROC.m 0 → 100644
  1 +function [TPR, FPR, AUC] = stimROC(C, T)
  2 +%build an ROC curve
  3 +% C - class labels as an array of binary values (1 = true positive)
  4 +% T - threshold used for classification
  5 +
  6 + %sort the thresholds in descending order and get the indices
  7 + [~, I] = sort(T, 'descend');
  8 +
  9 + %sort the class labels in the same order as the thresholds
  10 + Cs = C(I);
  11 +
  12 + %calculate the number of measurements
  13 + M = size(C, 2);
  14 +
  15 + %calculate the number of positives
  16 + P = nnz(C);
  17 +
  18 + %calculate the number of negatives
  19 + N = M - P;
  20 +
  21 + %if all examples are positives or negatives, return a perfect score?
  22 + if P == M
  23 + error('ERROR: no positive observations');
  24 + end
  25 + if P == 0
  26 + error('ERROR: no negative observations');
  27 + end
  28 +
  29 + %allocate space for the ROC curve
  30 + TPR = zeros(1, M);
  31 + FPR = zeros(1, M);
  32 +
  33 +
  34 +
  35 + %calculate the number of inflection points
  36 + ip = 0;
  37 + for i = 2:M
  38 + if Cs(i) ~= Cs(i-1)
  39 + ip = ip + 1;
  40 + end
  41 + end
  42 +
  43 + %initialize the true and false positive rates to zero
  44 + TP = 0;
  45 + FP = 0;
  46 + for i = 1:M
  47 + if Cs(i) == 1
  48 + TP = TP + 1;
  49 + else
  50 + FP = FP + 1;
  51 + end
  52 +
  53 + TPR(i) = TP / P;
  54 + FPR(i) = FP / N;
  55 + end
  56 +
  57 + %calculate the area under the ROC curve
  58 + AUC = 0;
  59 + for i = 2:M
  60 + w = FPR(i) - FPR(i-1);
  61 + h = TPR(i);
  62 + AUC = AUC + w * h;
  63 + end
  64 +
  65 +
  66 +
  67 +
  68 +
  69 +
0 70 \ No newline at end of file
... ...
python/enviProcess.py 0 → 100644
  1 +#!/usr/bin/python3
  2 +
  3 +#import system processes
  4 +import subprocess, sys
  5 +
  6 +if len(sys.argv) > 1:
  7 + infile = int(sys.argv[1])
  8 +
  9 +basefile = infile + "-base"
  10 +normfile = infile + "-norm"
  11 +
  12 +runcommand = "hsiproc " + infile + basefile + " --baseline baseline.txt"
  13 +subprocess.call(runcommand, shell=True)
0 14 \ No newline at end of file
... ...
stim/biomodels/cellset.h
... ... @@ -117,7 +117,7 @@ public:
117 117 }
118 118  
119 119 /// Return the maximum value of a field in this cell set
120   - double max(std::string field){
  120 + double maximum(std::string field){
121 121 size_t idx = fields[field]; //get the field index
122 122 size_t ncells = cells.size(); //get the total number of cells
123 123 double maxval, val; //stores the current and maximum values
... ... @@ -130,7 +130,7 @@ public:
130 130 }
131 131  
132 132 /// Return the maximum value of a field in this cell set
133   - double min(std::string field){
  133 + double minimum(std::string field){
134 134 size_t idx = fields[field]; //get the field index
135 135 size_t ncells = cells.size(); //get the total number of cells
136 136 double minval, val; //stores the current and maximum values
... ...
stim/biomodels/network.h
... ... @@ -11,8 +11,8 @@
11 11 #include <stim/math/vec3.h>
12 12 #include <stim/visualization/obj.h>
13 13 #include <stim/visualization/cylinder.h>
14   -#include <ANN/ANN.h>
15   -#include <boost/tuple/tuple.hpp>
  14 +#include <stim/structures/kdtree.cuh>
  15 +#include <stim/cuda/cudatools/timer.h>
16 16  
17 17  
18 18 namespace stim{
... ... @@ -35,7 +35,7 @@ class network{
35 35 // default constructor
36 36 edge() : cylinder<T>()
37 37 {
38   - v[1] = -1; v[0] = -1;
  38 + v[1] = (unsigned)(-1); v[0] = (unsigned)(-1);
39 39 }
40 40 /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor
41 41  
... ... @@ -57,7 +57,7 @@ class network{
57 57 /// Output the edge information as a string
58 58 std::string str(){
59 59 std::stringstream ss;
60   - ss<<"("<<cylinder<T>::size()<<")\tl = "<<this.length()<<"\t"<<v[0]<<"----"<<v[1];
  60 + ss<<"("<<cylinder<T>::size()<<")\tl = "<<this->length()<<"\t"<<v[0]<<"----"<<v[1];
61 61 return ss.str();
62 62 }
63 63  
... ... @@ -125,7 +125,9 @@ public:
125 125 return V.size();
126 126 }
127 127  
128   - std::vector<vertex> operator*(T s){
  128 + //scale the network by some constant value
  129 + // I don't think these work??????
  130 + /*std::vector<vertex> operator*(T s){
129 131 for (unsigned i=0; i< vertices; i ++ ){
130 132 V[i] = V[i] * s;
131 133 }
... ... @@ -139,10 +141,9 @@ public:
139 141 }
140 142 }
141 143 return V;
142   - }
  144 + }*/
143 145  
144 146 // Returns an average of branching index in the network
145   -
146 147 double BranchingIndex(){
147 148 double B=0;
148 149 for(unsigned v=0; v < V.size(); v ++){
... ... @@ -154,7 +155,6 @@ public:
154 155 }
155 156  
156 157 // Returns number of branch points in thenetwork
157   -
158 158 unsigned int BranchP(){
159 159 unsigned int B=0;
160 160 unsigned int c;
... ... @@ -168,7 +168,6 @@ public:
168 168 }
169 169  
170 170 // Returns number of end points (tips) in thenetwork
171   -
172 171 unsigned int EndP(){
173 172 unsigned int B=0;
174 173 unsigned int c;
... ... @@ -202,10 +201,11 @@ public:
202 201 // return s;
203 202 //}
204 203  
205   -
  204 + //Calculate Metrics---------------------------------------------------
206 205 // Returns an average of fiber/edge lengths in the network
207 206 double Lengths(){
208   - stim::vec<T> L;double sumLength = 0;
  207 + stim::vec<T> L;
  208 + double sumLength = 0;
209 209 for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network
210 210 L.push_back(E[e].length()); //append the edge length
211 211 sumLength = sumLength + E[e].length();
... ... @@ -269,8 +269,10 @@ public:
269 269 double avg = sumFractDim / E.size();
270 270 return avg;
271 271 }
272   - stim::cylinder<T> get_cylinder(unsigned f){
273   - return E[f]; //return the specified edge (casting it to a fiber)
  272 +
  273 + //returns a cylinder represented a given fiber (based on edge index)
  274 + stim::cylinder<T> get_cylinder(unsigned e){
  275 + return E[e]; //return the specified edge (casting it to a fiber)
274 276 }
275 277  
276 278 //load a network from an OBJ file
... ... @@ -385,11 +387,27 @@ public:
385 387 return n;
386 388 }
387 389  
  390 + //Copy the point cloud representing the centerline for the network into an array
  391 + void centerline_cloud(T* dst) {
  392 + size_t p; //stores the current edge point
  393 + size_t P; //stores the number of points in an edge
  394 + size_t i = 0; //index into the output array of points
  395 + for (size_t e = 0; e < E.size(); e++) { //for each edge in the network
  396 + P = E[e].size(); //get the number of points in this edge
  397 + for (p = 0; p < P; p++) {
  398 + dst[i * 3 + 0] = E[e][p][0];
  399 + dst[i * 3 + 1] = E[e][p][1];
  400 + dst[i * 3 + 2] = E[e][p][2];
  401 + i++;
  402 + }
  403 + }
  404 + }
  405 +
388 406 // gaussian function
389 407 float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25
390 408  
391   - // stim 3d vector to annpoint of 3 dimensions
392   - void stim2ann(ANNpoint &a, stim::vec3<T> b){
  409 + // convert vec3 to array
  410 + void stim2array(float *a, stim::vec3<T> b){
393 411 a[0] = b[0];
394 412 a[1] = b[1];
395 413 a[2] = b[2];
... ... @@ -413,57 +431,81 @@ public:
413 431  
414 432 /// @param A is the network to compare to - the field is generated for A
415 433 /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison
416   - stim::network<T> compare(stim::network<T> A, float sigma){
  434 + stim::network<T> compare(stim::network<T> A, float sigma, int device){
417 435  
418   - stim::network<T> R; //generate a network storing the result of the comparison
419   - R = (*this); //initialize the result with the current network
  436 + stim::network<T> R; //generate a network storing the result of the comparison
  437 + R = (*this); //initialize the result with the current network
420 438  
421   - //generate a KD-tree for network A
422   - float metric = 0.0; // initialize metric to be returned after comparing the networks
423   - ANNkd_tree* kdt; // initialize a pointer to a kd tree
424   - double **c; // centerline (array of double pointers) - points on kdtree must be double
425   - unsigned int n_data = A.total_points(); // set the number of points
426   - c = (double**) malloc(sizeof(double*) * n_data); // allocate the array pointer
427   - for(unsigned int i = 0; i < n_data; i++) // allocate space for each point of 3 dimensions
428   - c[i] = (double*) malloc(sizeof(double) * 3);
  439 + T *c; // centerline (array of double pointers) - points on kdtree must be double
  440 + size_t n_data = A.total_points(); // set the number of points
  441 + c = (T*) malloc(sizeof(T) * n_data * 3); //allocate an array to store all points in the data set
429 442  
430 443 unsigned t = 0;
431   - for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network
432   - for(unsigned p = 0; p < A.E[e].size(); p++){ //for each point in the edge
  444 + for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network
  445 + for(unsigned p = 0; p < A.E[e].size(); p++){ //for each point in the edge
433 446 for(unsigned d = 0; d < 3; d++){ //for each coordinate
434 447  
435   - c[t][d] = A.E[e][p][d];
  448 + c[t * 3 + d] = A.E[e][p][d]; //copy the point into the array c
436 449 }
437 450 t++;
438 451 }
439 452 }
440 453  
  454 + //generate a KD-tree for network A
  455 + //float metric = 0.0; // initialize metric to be returned after comparing the network
  456 + size_t MaxTreeLevels = 3; // max tree level
  457 +
  458 +#ifdef __CUDACC__
  459 + cudaSetDevice(device);
  460 + stim::cuda_kdtree<T, 3> kdt; // initialize a pointer to a kd tree
  461 +
441 462 //compare each point in the current network to the field produced by A
442   - ANNpointArray pts = (ANNpointArray)c; // create an array of data points of type double
443   - kdt = new ANNkd_tree(pts, n_data, 3); // build a KD tree using the annpointarray
444   - double eps = 0; // error bound
445   - ANNdistArray dists = new ANNdist[1]; // near neighbor distances
446   - ANNidxArray nnIdx = new ANNidx[1]; // near neighbor indices // allocate near neigh indices
  463 + kdt.create(c, n_data, MaxTreeLevels); // build a KD tree
  464 + T *dists = new T[1]; // near neighbor distances
  465 + size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices
447 466  
448 467 stim::vec3<T> p0, p1;
449   - float m1;
450   - float M = 0; //stores the total metric value
451   - float L = 0; //stores the total network length
452   - ANNpoint queryPt = annAllocPt(3);
  468 + T m1;
  469 + //float M = 0; //stores the total metric value
  470 + //float L = 0; //stores the total network length
  471 + T* queryPt = new T[3];
453 472 for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
454 473 R.E[e].add_mag(0); //add a new magnitude for the metric
455 474  
456 475 for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge
457 476  
458 477 p1 = R.E[e][p]; //get the next point in the edge
459   - stim2ann(queryPt, p1);
460   - kdt->annkSearch( queryPt, 1, nnIdx, dists, eps); //find the distance between A and the current network
461   - m1 = 1.0f - gaussianFunction((float)dists[0], sigma); //calculate the metric value based on the distance
  478 + stim2array(queryPt, p1);
  479 + kdt.search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network
  480 +
  481 + m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance
462 482 R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment
463 483  
464 484 }
465 485 }
  486 +#else
  487 + stim::cpu_kdtree<T, 3> kdt;
  488 + kdt.create(c, n_data, MaxTreeLevels);
  489 + T *dists = new T[1]; // near neighbor distances
  490 + size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices
  491 +
  492 + stim::vec3<T> p0, p1;
  493 + T m1;
  494 + T* queryPt = new T[3];
  495 + for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A
  496 + R.E[e].add_mag(0); //add a new magnitude for the metric
  497 +
  498 + for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge
466 499  
  500 + p1 = R.E[e][p]; //get the next point in the edge
  501 + stim2array(queryPt, p1);
  502 + kdt.cpu_search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network
  503 +
  504 + m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance
  505 + R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment
  506 + }
  507 + }
  508 +#endif
467 509 return R; //return the resulting network
468 510 }
469 511  
... ... @@ -487,7 +529,7 @@ public:
487 529 void load_txt(std::string filename)
488 530 {
489 531 std::vector <std::string> file_contents;
490   - std::ifstream file(filename);
  532 + std::ifstream file(filename.c_str());
491 533 std::string line;
492 534 std::vector<unsigned> id2vert; //this list stores the vertex ID associated with each network vertex
493 535 //for each line in the text file, store them as strings in file_contents
... ... @@ -538,7 +580,7 @@ public:
538 580 for(unsigned int d = 0; d < 3; d++){
539 581 ss<<p[i][d];
540 582 }
541   - ss < "\n";
  583 + ss << "\n";
542 584 }
543 585 return ss.str();
544 586 }
... ... @@ -552,8 +594,8 @@ public:
552 594 void
553 595 to_txt(std::string filename)
554 596 {
555   - std::ofstream ofs(filename, std::ofstream::out | std::ofstream::app);
556   - int num;
  597 + std::ofstream ofs(filename.c_str(), std::ofstream::out | std::ofstream::app);
  598 + //int num;
557 599 ofs << (E.size()).str() << "\n";
558 600 for(unsigned int i = 0; i < E.size(); i++)
559 601 {
... ... @@ -566,7 +608,8 @@ public:
566 608 {
567 609 std::string str;
568 610 str = V[i].str();
569   - removeCharsFromString(str, "[],");
  611 + char temp[4] = "[],";
  612 + removeCharsFromString(str, temp);
570 613 ofs << str << "\n";
571 614 }
572 615 ofs.close();
... ...
stim/biomodels/network_dep.h
... ... @@ -4,7 +4,7 @@
4 4 #include <stim/math/vector.h>
5 5 #include <stim/visualization/obj.h>
6 6 #include <list>
7   -#include <ANN/ANN.h>
  7 +//#include <ANN/ANN.h>
8 8  
9 9 namespace stim{
10 10  
... ...
stim/cuda/cudatools/error.h
  1 +#ifndef STIM_CUDA_ERROR_H
  2 +#define STIM_CUDA_ERROR_H
  3 +
1 4 #include <stdio.h>
2 5 #include <iostream>
3 6 using namespace std;
4 7 #include "cuda_runtime.h"
5 8 #include "device_launch_parameters.h"
6 9 #include "cufft.h"
7   -
8   -#ifndef CUDA_HANDLE_ERROR_H
9   -#define CUDA_HANDLE_ERROR_H
  10 +#include "cublas_v2.h"
10 11  
11 12 //handle error macro
12   -static void HandleError( cudaError_t err, const char *file, int line ) {
  13 +static void cuHandleError( cudaError_t err, const char *file, int line ) {
13 14 if (err != cudaSuccess) {
14   - //FILE* outfile = fopen("cudaErrorLog.txt", "w");
15   - //fprintf(outfile, "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
16   - //fclose(outfile);
17 15 printf("%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
18   - //exit( EXIT_FAILURE );
19 16  
20 17 }
21 18 }
22   -#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
  19 +#define HANDLE_ERROR( err ) (cuHandleError( err, __FILE__, __LINE__ ))
23 20  
24   -static void CufftError( cufftResult err )
  21 +static void cufftHandleError( cufftResult err, const char*file, int line )
25 22 {
26 23 if (err != CUFFT_SUCCESS)
27 24 {
... ... @@ -42,7 +39,29 @@ static void CufftError( cufftResult err )
42 39  
43 40 }
44 41 }
  42 +#define CUFFT_HANDLE_ERROR( err ) (cufftHandleError( err, __FILE__, __LINE__ ))
45 43  
  44 +static void cublasHandleError( cublasStatus_t err, const char*file, int line ){
  45 + if(err != CUBLAS_STATUS_SUCCESS){
  46 + if(err == CUBLAS_STATUS_NOT_INITIALIZED)
  47 + std::cout<<"CUBLAS_STATUS_NOT_INITIALIZED" <<" in file "<<file<<" line "<<std::endl;
  48 + else if(err == CUBLAS_STATUS_ALLOC_FAILED)
  49 + std::cout<<"CUBLAS_STATUS_ALLOC_FAILED" <<" in file "<<file<<" line "<<std::endl;
  50 + else if(err == CUBLAS_STATUS_INVALID_VALUE)
  51 + std::cout<<"CUBLAS_STATUS_INVALID_VALUE" <<" in file "<<file<<" line "<<std::endl;
  52 + else if(err == CUBLAS_STATUS_ARCH_MISMATCH)
  53 + std::cout<<"CUBLAS_STATUS_ARCH_MISMATCH" <<" in file "<<file<<" line "<<std::endl;
  54 + else if(err == CUBLAS_STATUS_MAPPING_ERROR)
  55 + std::cout<<"CUBLAS_STATUS_MAPPING_ERROR" <<" in file "<<file<<" line "<<std::endl;
  56 + else if(err == CUBLAS_STATUS_EXECUTION_FAILED)
  57 + std::cout<<"CUBLAS_STATUS_EXECUTION_FAILED" <<" in file "<<file<<" line "<<std::endl;
  58 + else if(err == CUBLAS_STATUS_INTERNAL_ERROR)
  59 + std::cout<<"CUBLAS_STATUS_INTERNAL_ERROR" <<" in file "<<file<<" line "<<std::endl;
  60 + else
  61 + std::cout<<"Unknown error"<<" in file "<<file<<" line "<<std::endl;
  62 + }
  63 +}
  64 +#define CUBLAS_HANDLE_ERROR( err ) (cublasHandleError( err, __FILE__, __LINE__ ))
46 65  
47 66  
48 67 #endif
... ...
stim/envi/agilent_binary.h
... ... @@ -4,13 +4,15 @@
4 4  
5 5 #include <string>
6 6 #include <fstream>
  7 +#include <complex>
7 8  
8 9 //CUDA
9   -#ifdef CUDA_FOUND
10   - #include <cuda_runtime.h>
11   - #include "cufft.h"
12   - #include <stim/cuda/cudatools/error.h>
13   -#endif
  10 +//#ifdef CUDA_FOUND
  11 +#include <cuda_runtime.h>
  12 +#include "cufft.h"
  13 +#include <stim/cuda/cudatools/error.h>
  14 +#include <stim/envi/envi_header.h>
  15 +//#endif
14 16  
15 17 namespace stim{
16 18  
... ... @@ -19,10 +21,10 @@ class agilent_binary{
19 21  
20 22 protected:
21 23 std::string fname;
22   - T* ptr;
23   - size_t R[3];
24   - static const size_t header = 1020;
25   - double Z[2];
  24 + T* ptr; //pointer to the image data
  25 + size_t R[3]; //size of the binary image in X, Y, and Z
  26 + static const size_t header = 1020; //header size
  27 + double Z[2]; //range of z values (position or wavelength)
26 28  
27 29 public:
28 30 size_t size(){
... ... @@ -42,6 +44,10 @@ public:
42 44 alloc();
43 45 }
44 46  
  47 + size_t dim(size_t i){
  48 + return R[i];
  49 + }
  50 +
45 51 /// Create a deep copy of an agileng_binary object
46 52 void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){
47 53 dst->alloc(src->R[0], src->R[1], src->R[2]); //allocate memory
... ... @@ -136,6 +142,42 @@ public:
136 142 return header;
137 143 }
138 144  
  145 + /// Subtract the mean from each pixel. Generally used for centering an interferogram.
  146 + void meancenter(){
  147 + size_t Z = R[2]; //store the number of bands
  148 + size_t XY = R[0] * R[1]; //store the number of pixels in the image
  149 + T sum = (T)0;
  150 + T mean;
  151 + for(size_t xy = 0; xy < XY; xy++){ //for each pixel
  152 + sum = 0;
  153 + for(size_t z = 0; z < Z; z++){ //for each band
  154 + sum += ptr[ z * XY + xy ]; //add the band value to a running sum
  155 + }
  156 + mean = sum / (T)Z; //calculate the pixel mean
  157 + for(size_t z = 0; z < Z; z++){
  158 + ptr[ z * XY + xy ] -= mean; //subtract the mean from each band
  159 + }
  160 + }
  161 + }
  162 +
  163 + /// adds n bands of zero padding to the end of the file
  164 + void zeropad(size_t n){
  165 + size_t newZ = R[2] + n;
  166 + T* temp = (T*) calloc(R[0] * R[1] * newZ, sizeof(T)); //allocate space for the new image
  167 + memcpy(temp, ptr, size() * sizeof(T)); //copy the old data to the new image
  168 +
  169 + free(ptr); //free the old data
  170 + ptr = temp; //swap in the new data
  171 + R[2] = newZ; //set the z-dimension to the new zero value
  172 + }
  173 +
  174 + //pads to the nearest power-of-two
  175 + void zeropad(){
  176 + size_t newZ = (size_t)pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two
  177 + size_t n = newZ - R[2]; //calculate the number of bands to add
  178 + zeropad(n); //add the padding
  179 + }
  180 +
139 181 /// Calculate the absorbance spectrum from the transmission spectrum given a background
140 182 void absorbance(stim::agilent_binary<T>* background){
141 183 size_t N = size(); //calculate the number of values to be ratioed
... ... @@ -147,7 +189,7 @@ public:
147 189 ptr[i] = -log10(ptr[i] / background->ptr[i]);
148 190 }
149 191  
150   -#ifdef CUDA_FOUND
  192 +//#ifdef CUDA_FOUND
151 193 /// Perform an FFT and return a binary file with bands in the specified range
152 194 agilent_binary<T> fft(double band_min, double band_max, double ELWN = 15798, int UDR = 2){
153 195 auto total_start = std::chrono::high_resolution_clock::now();
... ... @@ -234,7 +276,22 @@ public:
234 276  
235 277 return result;
236 278 }
237   -#endif
  279 +
  280 + //saves the binary as an ENVI file with a BIP interleave format
  281 + int bip(T* bip_ptr){
  282 + //std::ofstream out(outfile.c_str(), std::ios::binary); //create a binary file stream for output
  283 + size_t XY = R[0] * R[1];
  284 + size_t B = R[2];
  285 + size_t b;
  286 +
  287 + for(size_t xy = 0; xy < XY; xy++){
  288 + for(b = 0; b < B; b++){
  289 + bip_ptr[xy * B + b] = ptr[b * XY + xy];
  290 + }
  291 + }
  292 + return 0;
  293 + }
  294 +//#endif
238 295  
239 296 };
240 297  
... ...
stim/envi/bil.h
... ... @@ -4,6 +4,7 @@
4 4 #include "../envi/envi_header.h"
5 5 #include "../envi/hsi.h"
6 6 #include "../math/fd_coefficients.h"
  7 +#include <stim/cuda/cudatools/error.h>
7 8 #include <cstring>
8 9 #include <utility>
9 10 #include <deque>
... ... @@ -118,7 +119,7 @@ public:
118 119 page++;
119 120 //if wavelength is larger than the last wavelength in header file
120 121 if (page == Z()) {
121   - band_index(p, Z()-1);
  122 + band_index(p, Z()-1, PROGRESS);
122 123 return true;
123 124 }
124 125 }
... ... @@ -224,10 +225,44 @@ public:
224 225 }
225 226  
226 227 //given a Y ,return a XZ slice
227   - bool read_plane_y(T * p, unsigned long long y){
  228 + bool read_plane_xz(T * p, size_t y){
228 229 return binary<T>::read_plane_2(p, y);
229 230 }
230 231  
  232 + //given a Y, return ZX slice (transposed such that the spectrum is the leading dimension)
  233 + int read_plane_zx(T* p, size_t y){
  234 + T* temp = (T*) malloc(X() * Z() * sizeof(T)); //allocate space to store the temporary xz plane
  235 + binary<T>::read_plane_2(temp, y); //load the plane from disk
  236 + size_t z, x;
  237 + for(z = 0; z < Z(); z++){
  238 + for(x = 0; x <= z; x++){
  239 + p[x * Z() + z] = temp[z * X() + x]; //copy to the destination frame
  240 + }
  241 + }
  242 + }
  243 +
  244 + //load a frame y into a pre-allocated double-precision array
  245 + int read_plane_xzd(double* f, size_t y){
  246 + size_t XB = X() * Z();
  247 + T* temp = (T*) malloc(XB * sizeof(T)); //create a temporary location to store the plane at current precision
  248 + if(!read_plane_y(temp, y)) return 1; //read the plane in its native format, if it fails return a 1
  249 + for(size_t i = 0; i < XB; i++) f[i] = temp[i]; //convert the plane to a double
  250 + return 0;
  251 + }
  252 +
  253 + //given a Y, return ZX slice (transposed such that the spectrum is the leading dimension)
  254 + int read_plane_zxd(double* p, size_t y){
  255 + T* temp = (T*) malloc(X() * Z() * sizeof(T)); //allocate space to store the temporary xz plane
  256 + binary<T>::read_plane_2(temp, y); //load the plane from disk
  257 + size_t z, x;
  258 + for(z = 0; z < Z(); z++){
  259 + for(x = 0; x < X(); x++){
  260 + p[x * Z() + z] = (double)temp[z * X() + x]; //copy to the destination frame
  261 + }
  262 + }
  263 + return 0;
  264 + }
  265 +
231 266  
232 267 /// Perform baseline correction given a list of baseline points and stores the result in a new BSQ file.
233 268  
... ... @@ -268,7 +303,7 @@ public:
268 303 for (unsigned long long k =0; k < Y(); k++)
269 304 {
270 305 //get the current y slice
271   - read_plane_y(c, k);
  306 + read_plane_xz(c, k);
272 307  
273 308 //initialize lownum, highnum, low, high
274 309 ai = w[0];
... ... @@ -369,7 +404,7 @@ public:
369 404  
370 405 for(unsigned long long j = 0; j < Y(); j++)
371 406 {
372   - read_plane_y(c, j);
  407 + read_plane_xz(c, j);
373 408 for(unsigned long long i = 0; i < B; i++)
374 409 {
375 410 for(unsigned long long m = 0; m < X(); m++)
... ... @@ -469,7 +504,7 @@ public:
469 504  
470 505 for ( unsigned long long i = 0; i < Y(); i++)
471 506 {
472   - read_plane_y(p, i);
  507 + read_plane_xz(p, i);
473 508 for ( unsigned long long k = 0; k < Z(); k++)
474 509 {
475 510 unsigned long long ks = k * X();
... ... @@ -863,7 +898,7 @@ public:
863 898  
864 899 for (unsigned long long i = 0; i < Y(); i++) //for each value in Y() (BIP should be X)
865 900 {
866   - read_plane_y(temp, i); //retrieve an ZX slice, stored in temp
  901 + read_plane_xz(temp, i); //retrieve an ZX slice, stored in temp
867 902 for ( unsigned long long j = 0; j < Z(); j++) //for each Z() (Y)
868 903 {
869 904 for (unsigned long long k = 0; k < X(); k++) //for each band
... ... @@ -933,7 +968,7 @@ public:
933 968 //for each slice along the y axis
934 969 for (unsigned long long y = 0; y < Y(); y++) //Select a page by choosing Y coordinate, Y()
935 970 {
936   - read_plane_y(slice, y); //retrieve an ZX page, store in "slice"
  971 + read_plane_xz(slice, y); //retrieve an ZX page, store in "slice"
937 972  
938 973 //for each sample along X
939 974 for (unsigned long long x = 0; x < X(); x++) //Select a pixel by choosing X coordinate in the page, X()
... ... @@ -992,43 +1027,136 @@ public:
992 1027  
993 1028 /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum
994 1029 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
995   - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){
  1030 + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){
996 1031 unsigned long long XZ = X() * Z();
997 1032 unsigned long long XY = X() * Y();
998 1033 T* temp = (T*)malloc(sizeof(T) * XZ);
999   - for (unsigned long long j = 0; j < Z(); j++){
1000   - p[j] = 0;
1001   - }
  1034 + memset(m, 0, Z() * sizeof(double)); //initialize the mean to zero
  1035 + double* e_x2 = (double*)malloc(Z() * sizeof(double)); //allocate space for E[x^2]
  1036 + memset(e_x2, 0, Z() * sizeof(double)); //initialize E[x^2] to zero
1002 1037 //calculate vaild number in a band
1003   - unsigned long long count = 0;
1004   - for (unsigned long long j = 0; j < XY; j++){
1005   - if (mask == NULL || mask[j] != 0){
1006   - count++;
1007   - }
1008   - }
  1038 + size_t count = nnz(mask); //count the number of pixels in the mask
  1039 +
  1040 + double x; //create a register to store the pixel value
1009 1041 for (unsigned long long k = 0; k < Y(); k++){
1010   - read_plane_y(temp, k);
  1042 + read_plane_xz(temp, k);
1011 1043 unsigned long long kx = k * X();
1012 1044 for (unsigned long long i = 0; i < X(); i++){
1013 1045 if (mask == NULL || mask[kx + i] != 0){
1014 1046 for (unsigned long long j = 0; j < Z(); j++){
1015   - p[j] += temp[j * X() + i] / (double)count;
  1047 + x = temp[j * X() + i];
  1048 + m[j] += x / (double)count;
  1049 + e_x2[j] += x*x / (double)count;
1016 1050 }
1017 1051 }
1018 1052 }
1019 1053 if(PROGRESS) progress = (double)(k+1) / Y() * 100;
1020 1054 }
  1055 +
  1056 + for(size_t i = 0; i < Z(); i++) //calculate the standard deviation
  1057 + std[i] = sqrt(e_x2[i] - m[i] * m[i]);
  1058 +
1021 1059 free(temp);
1022 1060 return true;
1023 1061 }
1024 1062  
  1063 + int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
  1064 + cublasStatus_t stat;
  1065 + cublasHandle_t handle;
  1066 +
  1067 + progress = 0; //initialize the progress to zero (0)
  1068 + size_t XY = X() * Y(); //calculate the number of elements in a band image
  1069 + size_t XB = X() * Z();
  1070 + size_t B = Z(); //calculate the number of spectral elements
  1071 +
  1072 + double* F = (double*)malloc(sizeof(double) * B * X()); //allocate space for the frame that will be pulled from the file
  1073 + double* F_dev;
  1074 + HANDLE_ERROR(cudaMalloc(&F_dev, X() * B * sizeof(double))); //allocate space for the frame on the GPU
  1075 + double* s_dev; //declare a device pointer that will store the spectrum on the GPU
  1076 + double* A_dev; //declare a device pointer that will store the covariance matrix on the GPU
  1077 + double* avg_dev; //declare a device pointer that will store the average spectrum
  1078 + HANDLE_ERROR(cudaMalloc(&s_dev, B * sizeof(double))); //allocate space on the CUDA device for a spectrum
  1079 + HANDLE_ERROR(cudaMalloc(&A_dev, B * B * sizeof(double))); //allocate space on the CUDA device for the covariance matrix
  1080 + HANDLE_ERROR(cudaMemset(A_dev, 0, B * B * sizeof(double))); //initialize the covariance matrix to zero (0)
  1081 + HANDLE_ERROR(cudaMalloc(&avg_dev, XB * sizeof(double))); //allocate space on the CUDA device for the average spectrum
  1082 + for(size_t x = 0; x < X(); x++) //make multiple copies of the average spectrum in order to build a matrix
  1083 + HANDLE_ERROR(cudaMemcpy(&avg_dev[x * B], avg, B * sizeof(double), cudaMemcpyHostToDevice));
  1084 + //stat = cublasSetVector((int)B, sizeof(double), avg, 1, avg_dev, 1); //copy the average spectrum to the CUDA device
  1085 +
  1086 + double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product)
  1087 + double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction)
  1088 +
  1089 + CUBLAS_HANDLE_ERROR(stat = cublasCreate(&handle)); //create a cuBLAS instance
  1090 + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid
  1091 +
  1092 + else std::cout<<"Using cuBLAS to calculate the mean covariance matrix..."<<std::endl;
  1093 + double beta = 1.0;
  1094 + size_t x, y;
  1095 + for(y = 0; y < Y(); y++){ //for each line
  1096 + read_plane_zxd(F, y); //read a frame from the file
  1097 + HANDLE_ERROR(cudaMemcpy(F_dev, F, XB * sizeof(double), cudaMemcpyHostToDevice)); //copy the frame to the GPU
  1098 + CUBLAS_HANDLE_ERROR(cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, (int)B, (int)X(), &axpy_alpha, avg_dev, (int)B, &beta, F_dev, (int)B, F_dev, (int)B));//subtract the mean spectrum
  1099 +
  1100 + for(x = 0; x < X(); x++)
  1101 + CUBLAS_HANDLE_ERROR(cublasDsyr(handle, CUBLAS_FILL_MODE_UPPER, (int)B, &ger_alpha, &F_dev[x*B], 1, A_dev, (int)B)); //perform an outer product
  1102 + if(PROGRESS) progress = (double)(y + 1) / Y() * 100;
  1103 + }
  1104 +
  1105 + cublasGetMatrix((int)B, (int)B, sizeof(double), A_dev, (int)B, co, (int)B); //copy the result from the GPU to the CPU
  1106 +
  1107 + cudaFree(A_dev); //clean up allocated device memory
  1108 + cudaFree(s_dev);
  1109 + cudaFree(avg_dev);
  1110 +
  1111 + for(unsigned long long i = 0; i < B; i++){ //copy the upper triangular portion to the lower triangular portion
  1112 + for(unsigned long long j = i+1; j < B; j++){
  1113 + co[B * i + j] = co[B * j + i];
  1114 + }
  1115 + }
  1116 +
  1117 + return 0;
  1118 +
  1119 +
  1120 +
  1121 + }
  1122 +
  1123 +
1025 1124 /// Calculate the covariance matrix for all masked pixels in the image.
1026 1125  
1027 1126 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1028 1127 /// @param avg is a pointer to memory of size B that stores the average spectrum
1029 1128 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1030   - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
  1129 + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
1031 1130 progress = 0;
  1131 +
  1132 + if(use_gpu){
  1133 + int dev_count;
  1134 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1135 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1136 + cudaDeviceProp prop;
  1137 + int best_device_id = 0; //stores the best CUDA device
  1138 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1139 + std::cout<<"CUDA devices:"<<std::endl;
  1140 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1141 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1142 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1143 + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1144 + if(cc > best_device_cc){
  1145 + best_device_cc = cc; //if this is better than the previous device, use it
  1146 + best_device_id = d;
  1147 + }
  1148 + }
  1149 +
  1150 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1151 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1152 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1153 + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1154 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1155 + } //otherwise continue using the CPU
  1156 +
  1157 + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1158 + }
  1159 +
1032 1160 //memory allocation
1033 1161 unsigned long long xy = X() * Y();
1034 1162 unsigned long long B = Z();
... ... @@ -1325,7 +1453,7 @@ public:
1325 1453 c = (T*)malloc( L ); //allocate space for the slice
1326 1454  
1327 1455 for(unsigned long long j = 0; j < Y(); j++){ //for each line
1328   - read_plane_y(c, j); //load the line into memory
  1456 + read_plane_xz(c, j); //load the line into memory
1329 1457 for(unsigned long long i = 0; i < B; i++){ //for each band
1330 1458 for(unsigned long long m = 0; m < X(); m++){ //for each sample
1331 1459 if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked
... ... @@ -1355,7 +1483,7 @@ public:
1355 1483 c = (T*)malloc( L ); //allocate space for the slice
1356 1484  
1357 1485 for(unsigned long long j = 0; j < Y(); j++){ //for each line
1358   - read_plane_y(c, j); //load the line into memory
  1486 + read_plane_xz(c, j); //load the line into memory
1359 1487 for(unsigned long long i = 0; i < B; i++){ //for each band
1360 1488 for(unsigned long long m = 0; m < X(); m++){ //for each sample
1361 1489 if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked
... ...
stim/envi/bip.h
... ... @@ -5,13 +5,16 @@
5 5 #include "../envi/bil.h"
6 6 #include "../envi/hsi.h"
7 7 #include <cstring>
  8 +#include <complex>
8 9 #include <utility>
9 10  
10 11 //CUDA
11   -#ifdef CUDA_FOUND
12   - #include <cuda_runtime.h>
13   - #include "cublas_v2.h"
14   -#endif
  12 +//#ifdef CUDA_FOUND
  13 +#include <stim/cuda/cudatools/error.h>
  14 +#include <cuda_runtime.h>
  15 +#include "cublas_v2.h"
  16 +#include "cufft.h"
  17 +//#endif
15 18  
16 19 namespace stim{
17 20  
... ... @@ -257,7 +260,7 @@ public:
257 260 }
258 261  
259 262 //given a Y ,return a ZX slice
260   - bool read_plane_y(T * p, unsigned long long y){
  263 + bool read_plane_y(T * p, size_t y){
261 264 return binary<T>::read_plane_2(p, y);
262 265 }
263 266  
... ... @@ -954,39 +957,43 @@ public:
954 957  
955 958 /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum
956 959 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
957   - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){
  960 + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){
958 961 unsigned long long XY = X() * Y(); //calculate the total number of pixels in the HSI
959 962 T* temp = (T*)malloc(sizeof(T) * Z()); //allocate space for the current spectrum to be read
960   - memset(p, 0, sizeof(double) * Z()); //initialize the average spectrum to zero (0)
961   - //for (unsigned j = 0; j < Z(); j++){
962   - // p[j] = 0;
963   - //}
  963 + memset(m, 0, Z() * sizeof(double)); //set the mean spectrum to zero
  964 + double* e_x2 = (double*)malloc(Z() * sizeof(double)); //allocate space for E[x^2]
  965 + memset(e_x2, 0, Z() * sizeof(double)); //set all values for E[x^2] to zero
964 966  
965 967 unsigned long long count = nnz(mask); //calculate the number of masked pixels
966   -
  968 + double x;
967 969 for (unsigned long long i = 0; i < XY; i++){ //for each pixel in the HSI
968 970 if (mask == NULL || mask[i] != 0){ //if the pixel is masked
969 971 pixel(temp, i); //get the spectrum
970 972 for (unsigned long long j = 0; j < Z(); j++){ //for each spectral component
971   - p[j] += (double)temp[j] / (double)count; //add the weighted value to the average
  973 + x = temp[j];
  974 + m[j] += x / (double)count; //add the weighted value to the average
  975 + e_x2[j] += x*x / (double)count;
972 976 }
973 977 }
974 978 if(PROGRESS) progress = (double)(i+1) / XY * 100; //increment the progress
975 979 }
976 980  
  981 + //calculate the standard deviation
  982 + for(size_t i = 0; i < Z(); i++)
  983 + std[i] = sqrt(e_x2[i] - m[i] * m[i]);
  984 +
977 985 free(temp);
978 986 return true;
979 987 }
980   -#ifdef CUDA_FOUND
  988 +//#ifdef CUDA_FOUND
981 989 /// Calculate the covariance matrix for masked pixels using cuBLAS
982 990 /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra
983   - bool co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
  991 + int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
984 992  
985 993 cudaError_t cudaStat;
986 994 cublasStatus_t stat;
987 995 cublasHandle_t handle;
988 996  
989   - progress = 0; //initialize the progress to zero (0)
990 997 unsigned long long XY = X() * Y(); //calculate the number of elements in a band image
991 998 unsigned long long B = Z(); //calculate the number of spectral elements
992 999  
... ... @@ -1004,10 +1011,9 @@ public:
1004 1011 double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction)
1005 1012  
1006 1013 stat = cublasCreate(&handle); //create a cuBLAS instance
1007   - if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid
1008   - printf ("CUBLAS initialization failed\n");
1009   - return EXIT_FAILURE;
1010   - }
  1014 + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid
  1015 +
  1016 + else std::cout<<"Using cuBLAS to calculate the mean covariance matrix..."<<std::endl;
1011 1017 for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel
1012 1018 if (mask == NULL || mask[xy] != 0){
1013 1019 pixeld(s, xy); //retreive the spectrum at the current xy pixel location
... ... @@ -1031,26 +1037,45 @@ public:
1031 1037 }
1032 1038 }
1033 1039  
1034   - return true;
  1040 + return 0;
1035 1041 }
1036   -#endif
  1042 +//#endif
1037 1043  
1038 1044 /// Calculate the covariance matrix for all masked pixels in the image with 64-bit floating point precision.
1039 1045  
1040 1046 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1041 1047 /// @param avg is a pointer to memory of size B that stores the average spectrum
1042 1048 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1043   - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
1044   -
1045   -#ifdef CUDA_FOUND
1046   - int dev_count;
1047   - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
1048   - cudaDeviceProp prop;
1049   - cudaGetDeviceProperties(&prop, 0); //get the property of the first device
1050   - if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator
1051   - return co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1052   -#endif
  1049 + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
1053 1050 progress = 0;
  1051 +
  1052 + if(use_gpu){
  1053 + int dev_count;
  1054 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1055 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1056 + cudaDeviceProp prop;
  1057 + int best_device_id = 0; //stores the best CUDA device
  1058 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1059 + std::cout<<"CUDA devices----"<<std::endl;
  1060 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1061 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1062 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1063 + std::cout<<d<<": ["<<prop.major<<"."<<prop.minor<<"] "<<prop.name<<std::endl; //display the device information
  1064 + if(cc > best_device_cc){
  1065 + best_device_cc = cc; //if this is better than the previous device, use it
  1066 + best_device_id = d;
  1067 + }
  1068 + }
  1069 +
  1070 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1071 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1072 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1073 + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1074 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1075 + } //otherwise continue using the CPU
  1076 +
  1077 + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1078 + }
1054 1079 //memory allocation
1055 1080 unsigned long long XY = X() * Y();
1056 1081 unsigned long long B = Z();
... ... @@ -1092,10 +1117,10 @@ public:
1092 1117 }
1093 1118  
1094 1119  
1095   -#ifdef CUDA_FOUND
  1120 +//#ifdef CUDA_FOUND
1096 1121 /// Calculate the covariance matrix of Noise for masked pixels using cuBLAS
1097 1122 /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra
1098   - bool coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
  1123 + int coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
1099 1124  
1100 1125 cudaError_t cudaStat;
1101 1126 cublasStatus_t stat;
... ... @@ -1123,11 +1148,9 @@ public:
1123 1148 double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product)
1124 1149 double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction)
1125 1150  
1126   - stat = cublasCreate(&handle); //create a cuBLAS instance
1127   - if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid
1128   - printf ("CUBLAS initialization failed\n");
1129   - return EXIT_FAILURE;
1130   - }
  1151 + CUBLAS_HANDLE_ERROR(cublasCreate(&handle)); //create a cuBLAS instance
  1152 + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid
  1153 +
1131 1154 for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel
1132 1155 if (mask == NULL || mask[xy] != 0){
1133 1156 pixeld(s, xy); //retreive the spectrum at the current xy pixel location
... ... @@ -1158,27 +1181,44 @@ public:
1158 1181 }
1159 1182 }
1160 1183  
1161   - return true;
  1184 + return 0;
1162 1185 }
1163   -#endif
  1186 +//#endif
1164 1187  
1165 1188 /// Calculate the covariance of noise matrix for all masked pixels in the image with 64-bit floating point precision.
1166 1189  
1167 1190 /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1168 1191 /// @param avg is a pointer to memory of size B that stores the average spectrum
1169 1192 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1170   - bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
1171   -
1172   -#ifdef CUDA_FOUND
1173   - int dev_count;
1174   - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
1175   - cudaDeviceProp prop;
1176   - cudaGetDeviceProperties(&prop, 0); //get the property of the first device
1177   - if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator
1178   - return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1179   -#endif
1180   -
1181   -
  1193 + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
  1194 +
  1195 + if(use_gpu){
  1196 + int dev_count;
  1197 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1198 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1199 + cudaDeviceProp prop;
  1200 + int best_device_id = 0; //stores the best CUDA device
  1201 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1202 + std::cout<<"CUDA devices:"<<std::endl;
  1203 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1204 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1205 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1206 + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1207 + if(cc > best_device_cc){
  1208 + best_device_cc = cc; //if this is better than the previous device, use it
  1209 + best_device_id = d;
  1210 + }
  1211 + }
  1212 +
  1213 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1214 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1215 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1216 + int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1217 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1218 + } //otherwise continue using the CPU
  1219 +
  1220 + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl;
  1221 + }
1182 1222  
1183 1223 progress = 0;
1184 1224 //memory allocation
... ... @@ -1443,7 +1483,7 @@ public:
1443 1483 unsigned long long jump_sample = ( (Z() - b1) + b0 ) * sizeof(T);
1444 1484  
1445 1485 //distance between sample spectra in adjacent lines
1446   - unsigned long long jump_line = (X() - x1) * Z() * sizeof(T);
  1486 + unsigned long long jump_line = ( X() - x1 + x0 ) * Z() * sizeof(T);
1447 1487  
1448 1488  
1449 1489 //unsigned long long sp = y0 * X() + x0; //start pixel
... ... @@ -1682,7 +1722,117 @@ public:
1682 1722 return true;
1683 1723 }
1684 1724  
  1725 + int fft(std::string outname, size_t bandmin, size_t bandmax, size_t samples = 0, T* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int device = 0){
  1726 + if(device == -1){
  1727 + std::cout<<"ERROR: GPU required for FFT (uses cuFFT)."<<std::endl;
  1728 + exit(1);
  1729 + }
  1730 + if(samples == 0) samples = Z(); //if samples are specified, use all of them
  1731 + if(samples > Z()){
  1732 + std::cout<<"ERROR: stim::envi doesn't support FFT padding just yet."<<std::endl;
  1733 + exit(1);
  1734 + }
  1735 + int nd; //stores the number of CUDA devices
  1736 + HANDLE_ERROR(cudaGetDeviceCount(&nd)); //get the number of CUDA devices
  1737 + if(device >= nd){ //test for the existence of the requested device
  1738 + std::cout<<"ERROR: requested CUDA device for stim::envi::fft() doesn't exist"<<std::endl;
  1739 + exit(1);
  1740 + }
  1741 + HANDLE_ERROR(cudaSetDevice(device)); //set the CUDA device
  1742 + cudaDeviceProp prop;
  1743 + HANDLE_ERROR(cudaGetDeviceProperties(&prop, device)); //get the CUDA device properties
  1744 +
  1745 + size_t B = Z();
  1746 + size_t S = samples;
  1747 + size_t fft_size = S * sizeof(T); //number of bytes for each FFT
  1748 + size_t cuda_bytes = prop.totalGlobalMem; //get the number of bytes of global memory available
  1749 + size_t cuda_use = (size_t)floor(cuda_bytes * 0.2); //only use 80%
  1750 + size_t nS = cuda_use / fft_size; //calculate the number of spectra that can be loaded onto the GPU as a single batch
  1751 + size_t batch_bytes = nS * fft_size; //calculate the size of a batch (in bytes)
  1752 + size_t fft_bytes = nS * (S/2 + 1) * sizeof(cufftComplex);
  1753 + T* batch = (T*) malloc(batch_bytes); //allocate space in host memory to store a batch
  1754 + memset(batch, 0, batch_bytes);
  1755 + std::complex<T>* batch_fft = (std::complex<T>*) malloc(fft_bytes);
  1756 + T* gpu_batch; //device pointer to the batch
  1757 + HANDLE_ERROR(cudaMalloc(&gpu_batch, batch_bytes)); //allocate space on the device for the FFT batch
  1758 + cufftComplex* gpu_batch_fft; //allocate space for the FFT result
  1759 + HANDLE_ERROR(cudaMalloc(&gpu_batch_fft, fft_bytes));
  1760 + int N[1]; //create an array with the interferogram size (required for cuFFT input)
  1761 + N[0] = (int)S; //set the only array value to the interferogram size
  1762 +
  1763 + //if a background is provided for a ratio
  1764 + std::complex<T>* ratio_fft = NULL; //create a pointer for the FFT of the ratio image (if it exists)
  1765 + if(ratio){
  1766 + size_t bkg_bytes = rx * ry * S * sizeof(T); //calculate the total number of bytes in the background image
  1767 + T* bkg_copy = (T*) malloc(bkg_bytes); //allocate space to copy the background
  1768 + if(S == Z()) memcpy(bkg_copy, ratio, bkg_bytes); //if the number of samples used in processing equals the number of available samples
  1769 + else{
  1770 + for(size_t xyi = 0; xyi < rx*ry; xyi++)
  1771 + memcpy(&bkg_copy[xyi * S], &ratio[xyi * B], S * sizeof(T));
  1772 + }
  1773 + T* gpu_ratio;
  1774 + HANDLE_ERROR(cudaMalloc(&gpu_ratio, bkg_bytes));
  1775 + HANDLE_ERROR(cudaMemcpy(gpu_ratio, bkg_copy, bkg_bytes, cudaMemcpyHostToDevice));
  1776 + cufftHandle bkg_plan;
  1777 + CUFFT_HANDLE_ERROR(cufftPlanMany(&bkg_plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)(rx * ry)));
  1778 + size_t bkg_fft_bytes = rx * ry * (S / 2 + 1) * sizeof(cufftComplex);
  1779 + T* gpu_ratio_fft;
  1780 + HANDLE_ERROR(cudaMalloc(&gpu_ratio_fft, bkg_fft_bytes));
  1781 + CUFFT_HANDLE_ERROR(cufftExecR2C(bkg_plan, (cufftReal*)gpu_ratio, (cufftComplex*)gpu_ratio_fft));
  1782 + ratio_fft = (std::complex<T>*) malloc(bkg_fft_bytes);
  1783 + HANDLE_ERROR(cudaMemcpy(ratio_fft, gpu_ratio_fft, bkg_fft_bytes, cudaMemcpyDeviceToHost));
  1784 + HANDLE_ERROR(cudaFree(gpu_ratio));
  1785 + HANDLE_ERROR(cudaFree(gpu_ratio_fft));
  1786 + CUFFT_HANDLE_ERROR(cufftDestroy(bkg_plan));
  1787 + }
1685 1788  
  1789 + cufftHandle plan; //create a CUFFT plan
  1790 + CUFFT_HANDLE_ERROR(cufftPlanMany(&plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)nS));
  1791 +
  1792 + std::ofstream outfile(outname, std::ios::binary); //open a file for writing
  1793 +
  1794 + size_t XY = X() * Y(); //calculate the number of spectra
  1795 + size_t xy = 0;
  1796 + size_t bs; //stores the number of spectra in the current batch
  1797 + size_t s, b;
  1798 + size_t S_fft = S/2 + 1;
  1799 + size_t bandkeep = bandmax - bandmin + 1;
  1800 + size_t x, y;
  1801 + size_t ratio_i;
  1802 + T* temp_spec = (T*) malloc(Z() * sizeof(T)); //allocate space to hold a single pixel
  1803 + while(xy < XY){ //while there are unprocessed spectra
  1804 + bs = min(XY - xy, nS); //calculate the number of spectra to include in the batch
  1805 + for(s = 0; s < bs; s++){ //for each spectrum in the batch
  1806 + pixel(temp_spec, xy + s); //read a pixel from disk
  1807 + memcpy(&batch[s * S], temp_spec, S * sizeof(T));
  1808 + //pixel(&batch[s * S], xy + s); //read the next spectrum
  1809 + }
  1810 + HANDLE_ERROR(cudaMemcpy(gpu_batch, batch, batch_bytes, cudaMemcpyHostToDevice));
  1811 + CUFFT_HANDLE_ERROR(cufftExecR2C(plan, (cufftReal*)gpu_batch, gpu_batch_fft)); //execute the (implicitly forward) transform
  1812 + HANDLE_ERROR(cudaMemcpy(batch_fft, gpu_batch_fft, fft_bytes, cudaMemcpyDeviceToHost)); //copy the data back to the GPU
  1813 + for(s = 0; s < bs; s++){ //for each spectrum in the batch
  1814 + y = (xy + s)/X();
  1815 + x = xy + s - y * X();
  1816 + if(ratio_fft) ratio_i = (y % ry) * rx + (x % rx); //if a background is used, calculate the coordinates into it
  1817 + for(b = 0; b < S/2 + 1; b++){ //for each sample
  1818 + if(ratio_fft)
  1819 + batch[s * S + b] = -log(abs(batch_fft[s * S_fft + b]) / abs(ratio_fft[ratio_i * S_fft + b]));
  1820 + else
  1821 + batch[s * S + b] = abs(batch_fft[s * S_fft + b]); //calculate the magnitude of the spectrum
  1822 + }
  1823 + outfile.write((char*)&batch[s * S + bandmin], bandkeep * sizeof(T)); //save the resulting spectrum
  1824 + }
  1825 + xy += bs; //increment xy by the number of spectra processed
  1826 + if(PROGRESS) progress = (double)xy / (double)XY * 100;
  1827 + }
  1828 + outfile.close();
  1829 + free(ratio_fft);
  1830 + free(batch_fft);
  1831 + free(batch);
  1832 + HANDLE_ERROR(cudaFree(gpu_batch));
  1833 + HANDLE_ERROR(cudaFree(gpu_batch_fft));
  1834 + return 0;
  1835 + }
1686 1836  
1687 1837 /// Close the file.
1688 1838 bool close(){
... ...
stim/envi/bsq.h
... ... @@ -104,6 +104,7 @@ public:
104 104 //if wavelength is smaller than the first one in header file
105 105 if ( w[page] > wavelength ){
106 106 band_index(p, page);
  107 + if(PROGRESS) progress = 100;
107 108 return true;
108 109 }
109 110  
... ... @@ -114,6 +115,7 @@ public:
114 115 // (the wavelength is out of bounds)
115 116 if (page == Z()) {
116 117 band_index(p, Z()-1); //return the last band
  118 + if(PROGRESS) progress = 100;
117 119 return true;
118 120 }
119 121 }
... ... @@ -561,12 +563,12 @@ public:
561 563 free(src[1]);
562 564 free(dst[0]);
563 565 free(dst[1]);
564   - //if(VERBOSE){
  566 + if(VERBOSE){
565 567 std::cout<<"total time to execute bsq::bip(): "<<t_total<<" ms"<<std::endl;
566 568 std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl;
567 569 std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl;
568 570 std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl;
569   - //}
  571 + }
570 572 return true; //return true
571 573 }
572 574  
... ... @@ -1120,27 +1122,61 @@ public:
1120 1122  
1121 1123 /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum
1122 1124 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1123   - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){
  1125 + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){
1124 1126 unsigned long long XY = X() * Y();
1125   - unsigned long long count = 0; //count will store the number of masked pixels
  1127 + unsigned long long count = nnz(mask); //count will store the number of masked pixels
1126 1128 T* temp = (T*)malloc(sizeof(T) * XY);
1127   - //calculate this loop counts the number of true pixels in the mask
1128   - for (unsigned j = 0; j < XY; j++){
1129   - if (mask == NULL || mask[j] != 0){
1130   - count++;
1131   - }
1132   - }
  1129 +
1133 1130 //this loops goes through each band in B (Z())
1134 1131 // masked (or valid) pixels from that band are averaged and the average is stored in p
  1132 + double e_x; //stores E[x]^2
  1133 + double e_x2; //stores E[x^2]
  1134 + double x;
1135 1135 for (unsigned long long i = 0; i < Z(); i++){
1136   - p[i] = 0;
  1136 + e_x = 0;
  1137 + e_x2 = 0;
1137 1138 band_index(temp, i); //get the band image and store it in temp
1138 1139 for (unsigned long long j = 0; j < XY; j++){ //loop through temp, averaging valid pixels
1139 1140 if (mask == NULL || mask[j] != 0){
1140   - p[i] += (double)temp[j] / (double)count;
  1141 + x = (double)temp[j];
  1142 + e_x += x / (double)count; //sum the expected value of x
  1143 + e_x2 += (x * x) / (double)count; //sum the expected value of x^2
1141 1144 }
1142 1145 }
1143   - if(PROGRESS) progress = (double)(i+1) / Z() * 100;
  1146 + m[i] = e_x; //store the mean
  1147 + std[i] = sqrt(e_x2 - e_x * e_x); //calculate the standard deviation
  1148 + if(PROGRESS) progress = (double)(i+1) / Z() * 100; //update the progress counter
  1149 + }
  1150 + free(temp);
  1151 + return true;
  1152 + }
  1153 +
  1154 + /// Calculate the median value for all masked (or valid) pixels in a band and returns the median spectrum
  1155 +
  1156 + /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum
  1157 + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
  1158 + bool median_spectrum(double* m, unsigned char* mask = NULL, bool PROGRESS = false){
  1159 + size_t XY = X() * Y();
  1160 + size_t count = nnz(mask); //count will store the number of masked pixels
  1161 + T* temp = (T*)malloc(sizeof(T) * XY);
  1162 +
  1163 + std::vector<T> band_values(count); //create an STD vector of band values
  1164 +
  1165 + //this loops goes through each band in B (Z())
  1166 + // masked (or valid) pixels from that band are averaged and the average is stored in p
  1167 + size_t k;
  1168 + for (size_t i = 0; i < Z(); i++){ //for each band
  1169 + band_index(temp, i); //get the band image and store it in temp
  1170 + k = 0; //initialize the band_value index to zero
  1171 + for (size_t j = 0; j < XY; j++){ //loop through temp, averaging valid pixels
  1172 + if (mask == NULL || mask[j] != 0){
  1173 + band_values[k] = temp[j]; //store the value in the band_values array
  1174 + k++; //increment the band_values index
  1175 + }
  1176 + }
  1177 + std::sort(band_values.begin(), band_values.end()); //sort all of the values in the band
  1178 + m[i] = band_values[ count/2 ]; //store the center value in the array
  1179 + if(PROGRESS) progress = (double)(i+1) / Z() * 100; //update the progress counter
1144 1180 }
1145 1181 free(temp);
1146 1182 return true;
... ... @@ -1203,6 +1239,52 @@ public:
1203 1239 return true;
1204 1240 }
1205 1241  
  1242 + ///Crop out several subimages and assemble a new image from these concatenated subimages
  1243 +
  1244 + /// @param outfile is the file name for the output image
  1245 + /// @param sx is the width of each subimage
  1246 + /// @param sy is the height of each subimage
  1247 + /// @mask is the mask used to define subimage positions extracted from the input file
  1248 + void subimages(std::string outfile, size_t sx, size_t sy, unsigned char* mask, bool PROGRESS = false){
  1249 +
  1250 + size_t N = nnz(mask); //get the number of subimages
  1251 + T* dst = (T*) malloc(N * sx * sy * sizeof(T)); //allocate space for a single band of the output image
  1252 + memset(dst, 0, N*sx*sy*sizeof(T)); //initialize the band image to zero
  1253 +
  1254 + std::ofstream out(outfile, std::ios::binary); //open a file for writing
  1255 +
  1256 + T* src = (T*) malloc(X() * Y() * sizeof(T));
  1257 +
  1258 + for(size_t b = 0; b < Z(); b++){ //for each band
  1259 + band_index(src, b); //load the band image
  1260 + size_t i = 0; //create an image index and initialize it to zero
  1261 + size_t n = 0;
  1262 + while(n < N){ //for each subimage
  1263 + if(mask[i]){ //if the pixel is masked, copy the surrounding pixels into the destination band
  1264 + size_t yi = i / X(); //determine the y position of the current pixel
  1265 + size_t xi = i - yi * X(); //determine the x position of the current pixel
  1266 + if( xi > sx/2 && xi < X() - sx/2 && //if the subimage is completely within the bounds of the original image
  1267 + yi > sy/2 && yi < Y() - sy/2){
  1268 + size_t cx = xi - sx/2; //calculate the corner position for the subimage
  1269 + size_t cy = yi - sy/2;
  1270 + for(size_t syi = 0; syi < sy; syi++){ //for each line in the subimage
  1271 + size_t src_i = (cy + syi) * X() + cx;
  1272 + //size_t dst_i = syi * (N * sx) + n * sx;
  1273 + size_t dst_i = (n * sy + syi) * sx;
  1274 + memcpy(&dst[dst_i], &src[src_i], sx * sizeof(T)); //copy one line from the subimage to the destination image
  1275 + }
  1276 + n++;
  1277 + }
  1278 + }
  1279 + i++;
  1280 + if(PROGRESS) progress = (double)( (n+1) * (b+1) ) / (N * Z()) * 100;
  1281 + }//end while n
  1282 + out.write((const char*)dst, N * sx * sy * sizeof(T)); //write the band to memory
  1283 + }
  1284 + free(dst); //free memory
  1285 + free(src);
  1286 + }
  1287 +
1206 1288 /// Remove a list of bands from the ENVI file
1207 1289  
1208 1290 /// @param outfile is the file name for the output hyperspectral image (with trimmed bands)
... ...
stim/envi/envi.h
... ... @@ -6,6 +6,8 @@
6 6 #include "../envi/bip.h"
7 7 #include "../envi/bil.h"
8 8 #include "../math/fd_coefficients.h"
  9 +#include <stim/parser/filename.h>
  10 +#include <stim/util/filesize.h>
9 11 #include <iostream>
10 12 #include <fstream>
11 13 //#include "../image/image.h"
... ... @@ -76,7 +78,31 @@ public:
76 78  
77 79 allocate();
78 80 }
  81 + //used to test if the current ENVI file is valid
  82 + operator bool(){
  83 + if(file == NULL) return false;
  84 + return true;
  85 + }
  86 +
  87 + //test to determine if the specified file is an ENVI file
  88 + static bool is_envi(std::string fname, std::string hname = ""){
  89 + stim::filename data_file(fname);
  90 + stim::filename header_file;
  91 + if(hname == ""){ //if the header isn't provided
  92 + header_file = data_file; //assume that it's the same name as the data file, with a .hdr extension
  93 + header_file = header_file.extension("hdr");
  94 + }
  95 + else header_file = hname; //otherwise load the passed header
  96 +
  97 + stim::envi_header H;
  98 + if(H.load(header_file) == false) //load the header file, if it doesn't load return false
  99 + return false;
  100 + size_t targetBytes = H.data_bytes(); //get the number of bytes that SHOULD be in the data file
  101 + size_t bytes = stim::file_size(fname);
  102 + if(bytes != targetBytes) return false; //if the data doesn't match the header, return false
  103 + return true; //otherwise everything looks fine
79 104  
  105 + }
80 106  
81 107  
82 108 void* malloc_spectrum(){
... ... @@ -359,11 +385,23 @@ public:
359 385  
360 386 fseek(f, 9, SEEK_SET); //seek to the number of bands
361 387 short b; //allocate space for the number of bands
362   - fread(&b, sizeof(short), 1, f); //read the number of bands
  388 + size_t nread = fread(&b, sizeof(short), 1, f); //read the number of bands
  389 + if(nread != 1){
  390 + std::cout<<"Error reading band number from Agilent file."<<std::endl;
  391 + exit(1);
  392 + }
363 393 fseek(f, 13, SEEK_CUR); //skip the the x and y dimensions
364 394 short x, y;
365   - fread(&x, sizeof(short), 1, f); //read the image x and y size
366   - fread(&y, sizeof(short), 1, f);
  395 + nread = fread(&x, sizeof(short), 1, f); //read the image x and y size
  396 + if(nread != 1){
  397 + std::cout<<"Error reading X dimension from Agilent file."<<std::endl;
  398 + exit(1);
  399 + }
  400 + nread = fread(&y, sizeof(short), 1, f);
  401 + if(nread != 1){
  402 + std::cout<<"Error reading Y dimension from Agilent file."<<std::endl;
  403 + exit(1);
  404 + }
367 405 fclose(f); //close the file
368 406  
369 407 //store the information from the Agilent header in the ENVI header
... ... @@ -1368,12 +1406,12 @@ public:
1368 1406  
1369 1407 /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum
1370 1408 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1371   - bool avg_band(double * p, unsigned char* mask, bool PROGRESS = false){
  1409 + bool mean_spectrum(double * p, double* std, unsigned char* mask, bool PROGRESS = false){
1372 1410 if (header.interleave == envi_header::BSQ){
1373 1411 if (header.data_type == envi_header::float32)
1374   - return ((bsq<float>*)file)->avg_band(p, mask, PROGRESS);
  1412 + return ((bsq<float>*)file)->mean_spectrum(p, std, mask, PROGRESS);
1375 1413 else if (header.data_type == envi_header::float64)
1376   - return ((bsq<double>*)file)->avg_band(p, mask, PROGRESS);
  1414 + return ((bsq<double>*)file)->mean_spectrum(p, std, mask, PROGRESS);
1377 1415 else{
1378 1416 std::cout << "ERROR: unidentified data type" << std::endl;
1379 1417 exit(1);
... ... @@ -1381,9 +1419,9 @@ public:
1381 1419 }
1382 1420 else if (header.interleave == envi_header::BIL){
1383 1421 if (header.data_type == envi_header::float32)
1384   - return ((bil<float>*)file)->avg_band(p, mask, PROGRESS);
  1422 + return ((bil<float>*)file)->mean_spectrum(p, std, mask, PROGRESS);
1385 1423 else if (header.data_type == envi_header::float64)
1386   - return ((bil<double>*)file)->avg_band(p, mask, PROGRESS);
  1424 + return ((bil<double>*)file)->mean_spectrum(p, std, mask, PROGRESS);
1387 1425 else{
1388 1426 std::cout << "ERROR: unidentified data type" << std::endl;
1389 1427 exit(1);
... ... @@ -1391,14 +1429,36 @@ public:
1391 1429 }
1392 1430 else if (header.interleave == envi_header::BIP){
1393 1431 if (header.data_type == envi_header::float32)
1394   - return ((bip<float>*)file)->avg_band(p, mask, PROGRESS);
  1432 + return ((bip<float>*)file)->mean_spectrum(p, std, mask, PROGRESS);
  1433 + else if (header.data_type == envi_header::float64)
  1434 + return ((bip<double>*)file)->mean_spectrum(p, std, mask, PROGRESS);
  1435 + else{
  1436 + std::cout << "ERROR: unidentified data type" << std::endl;
  1437 + exit(1);
  1438 + }
  1439 + }
  1440 + return false;
  1441 + }
  1442 +
  1443 + /// Calculate the mean value for all masked (or valid) pixels in a band and returns the average spectrum
  1444 +
  1445 + /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum
  1446 + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
  1447 + bool median_spectrum(double* m, unsigned char* mask, bool PROGRESS = false){
  1448 + if (header.interleave == envi_header::BSQ){
  1449 + if (header.data_type == envi_header::float32)
  1450 + return ((bsq<float>*)file)->median_spectrum(m, mask, PROGRESS);
1395 1451 else if (header.data_type == envi_header::float64)
1396   - return ((bip<double>*)file)->avg_band(p, mask, PROGRESS);
  1452 + return ((bsq<double>*)file)->median_spectrum(m, mask, PROGRESS);
1397 1453 else{
1398 1454 std::cout << "ERROR: unidentified data type" << std::endl;
1399 1455 exit(1);
1400 1456 }
1401 1457 }
  1458 + else{
  1459 + std::cout<<"ERROR: median calculation is only supported for BSQ interleave types. Convert to process."<<std::endl;
  1460 + exit(1);
  1461 + }
1402 1462 return false;
1403 1463 }
1404 1464  
... ... @@ -1407,16 +1467,16 @@ public:
1407 1467 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1408 1468 /// @param avg is a pointer to memory of size B that stores the average spectrum
1409 1469 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1410   - bool co_matrix(double* co, double* avg, unsigned char* mask, bool PROGRESS = false){
  1470 + bool co_matrix(double* co, double* avg, unsigned char* mask, bool use_gpu, bool PROGRESS = false){
1411 1471 if (header.interleave == envi_header::BSQ){
1412 1472 std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl;
1413 1473 exit(1);
1414 1474 }
1415 1475 else if (header.interleave == envi_header::BIL){
1416 1476 if (header.data_type == envi_header::float32)
1417   - return ((bil<float>*)file)->co_matrix(co, avg, mask, PROGRESS);
  1477 + return ((bil<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1418 1478 else if (header.data_type == envi_header::float64)
1419   - return ((bil<double>*)file)->co_matrix(co, avg, mask, PROGRESS);
  1479 + return ((bil<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1420 1480 else{
1421 1481 std::cout << "ERROR: unidentified data type" << std::endl;
1422 1482 exit(1);
... ... @@ -1424,9 +1484,9 @@ public:
1424 1484 }
1425 1485 else if (header.interleave == envi_header::BIP){
1426 1486 if (header.data_type == envi_header::float32)
1427   - return ((bip<float>*)file)->co_matrix(co, avg, mask, PROGRESS);
  1487 + return ((bip<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1428 1488 else if (header.data_type == envi_header::float64)
1429   - return ((bip<double>*)file)->co_matrix(co, avg, mask, PROGRESS);
  1489 + return ((bip<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1430 1490 else{
1431 1491 std::cout << "ERROR: unidentified data type" << std::endl;
1432 1492 exit(1);
... ... @@ -1440,7 +1500,7 @@ public:
1440 1500 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1441 1501 /// @param avg is a pointer to memory of size B that stores the average spectrum
1442 1502 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1443   - bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){
  1503 + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool use_gpu = true, bool PROGRESS = false){
1444 1504 if (header.interleave == envi_header::BSQ){
1445 1505 std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl;
1446 1506 exit(1);
... ... @@ -1454,9 +1514,9 @@ public:
1454 1514  
1455 1515 else if (header.interleave == envi_header::BIP){
1456 1516 if (header.data_type == envi_header::float32)
1457   - return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
  1517 + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
1458 1518 else if (header.data_type == envi_header::float64)
1459   - return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
  1519 + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
1460 1520 else{
1461 1521 std::cout << "ERROR: unidentified data type" << std::endl;
1462 1522 exit(1);
... ... @@ -1524,6 +1584,41 @@ public:
1524 1584 return false;
1525 1585 }
1526 1586  
  1587 + void subimages(std::string outfile, size_t nx, size_t ny, unsigned char* mask, bool PROGRESS = false){
  1588 +
  1589 + size_t nnz = 0; //initialize the number of subimages to zero
  1590 + for(size_t i = 0; i < header.lines * header.samples; i++) //for each pixel in the mask
  1591 + if(mask[i]) nnz++; //if the pixel is valid, add a subimage
  1592 +
  1593 +
  1594 + //save the header for the cropped file
  1595 + stim::envi_header new_header = header;
  1596 + new_header.samples = nx; //calculate the width of the output image (concatenated subimages)
  1597 + new_header.lines = nnz * ny; //calculate the height of the output image (height of subimages)
  1598 +
  1599 +
  1600 + if (header.interleave == envi_header::BSQ){
  1601 + if (header.data_type == envi_header::float32)
  1602 + ((bsq<float>*)file)->subimages(outfile, nx, ny, mask, PROGRESS);
  1603 + else if (header.data_type == envi_header::float64)
  1604 + ((bsq<double>*)file)->subimages(outfile, nx, ny, mask, PROGRESS);
  1605 + else{
  1606 + std::cout << "ERROR: unidentified data type" << std::endl;
  1607 + exit(1);
  1608 + }
  1609 + }
  1610 + else if (header.interleave == envi_header::BIL){
  1611 + std::cout << "ERROR: unidentified data type" << std::endl;
  1612 + exit(1);
  1613 + }
  1614 + else if (header.interleave == envi_header::BIP){
  1615 + std::cout << "ERROR: unidentified data type" << std::endl;
  1616 + exit(1);
  1617 + }
  1618 +
  1619 + new_header.save(outfile + ".hdr"); //save the header for the output file
  1620 + }
  1621 +
1527 1622 /// Remove a list of bands from the ENVI file
1528 1623  
1529 1624 /// @param outfile is the file name for the output hyperspectral image (with trimmed bands)
... ... @@ -1801,6 +1896,44 @@ public:
1801 1896 }
1802 1897 exit(1);
1803 1898 }
  1899 +
  1900 +
  1901 +
  1902 +
  1903 + void fft(std::string outfile, double band_min, double band_max, size_t samples = 0, void* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int cuda_device = 0){
  1904 + if(samples == 0) samples = header.bands;
  1905 + double B = (double)header.bands;
  1906 + double delta = header.wavelength[1] - header.wavelength[0]; //calculate spacing in the current domain
  1907 + double span = samples * delta; //calculate the span in the current domain
  1908 + double fft_delta = 1.0 / span; //calculate the span in the FFT domain
  1909 + double fft_max = fft_delta * samples/2; //calculate the maximum range of the FFT
  1910 +
  1911 + if(band_max > fft_max) band_max = fft_max; //the user gave a band outside of the FFT range, reset the band to the maximum available
  1912 + size_t start_i = (size_t)std::ceil(band_min / fft_delta); //calculate the first band to store
  1913 + size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i + 1; //calculate the number of bands to store
  1914 + size_t end_i = start_i + size_i - 1; //last band number
  1915 +
  1916 + envi_header new_header = header;
  1917 + new_header.bands = size_i;
  1918 + new_header.set_wavelengths(start_i * fft_delta, fft_delta);
  1919 + new_header.wavelength_units = "inv_" + header.wavelength_units;
  1920 + new_header.save(outfile + ".hdr");
  1921 +
  1922 + if (header.interleave == envi_header::BIP){
  1923 + if (header.data_type == envi_header::float32)
  1924 + ((bip<float>*)file)->fft(outfile, start_i, end_i, samples, (float*)ratio, rx, ry, PROGRESS, cuda_device);
  1925 + else if (header.data_type == envi_header::float64)
  1926 + ((bip<double>*)file)->fft(outfile, start_i, end_i, samples, (double*)ratio, rx, ry, PROGRESS, cuda_device);
  1927 + else{
  1928 + std::cout << "ERROR: unidentified data type" << std::endl;
  1929 + exit(1);
  1930 + }
  1931 + }
  1932 + else{
  1933 + std::cout<<"ERROR: only BIP files supported for FFT"<<std::endl;
  1934 + exit(1);
  1935 + }
  1936 + }
1804 1937 }; //end ENVI
1805 1938  
1806 1939 } //end namespace rts
... ...
stim/envi/envi_header.h
... ... @@ -78,6 +78,14 @@ struct envi_header
78 78 load(name);
79 79 }
80 80  
  81 + //sets the wavelength vector given a starting value and uniform step size
  82 + void set_wavelengths(double start, double step){
  83 + size_t B = bands; //get the number of bands
  84 + wavelength.resize(B);
  85 + for(size_t b = 0; b < B; b++)
  86 + wavelength[b] = start + b * step;
  87 + }
  88 +
81 89 std::string trim(std::string line){
82 90  
83 91 if(line.length() == 0)
... ... @@ -417,8 +425,13 @@ struct envi_header
417 425 default:
418 426 return 0;
419 427 }
  428 + }
420 429  
  430 + //return the number of bytes that SHOULD be in the data file
  431 + size_t data_bytes(){
  432 + return samples * lines * bands * valsize() + header_offset;
421 433 }
  434 +
422 435  
423 436 /// Convert an interleave type to a string
424 437 static std::string interleave_str(interleaveType t){
... ...
stim/envi/hsi.h
... ... @@ -142,7 +142,7 @@ public:
142 142 void mask_finite(unsigned char* out_mask, unsigned char* mask, bool PROGRESS = false){
143 143 size_t XY = X() * Y();
144 144 if(mask == NULL) //if no mask is provided
145   - memset(mask, 255, XY * sizeof(unsigned char)); //initialize the mask to 255
  145 + memset(out_mask, 255, XY * sizeof(unsigned char)); //initialize the mask to 255
146 146 else //if a mask is provided
147 147 memcpy(out_mask, mask, XY * sizeof(unsigned char)); //initialize the current mask to that one
148 148 T* page = (T*)malloc(R[0] * R[1] * sizeof(T)); //allocate space for a page of data
... ...
stim/gl/error.h
... ... @@ -12,4 +12,5 @@
12 12 } \
13 13 }
14 14  
  15 +
15 16 #endif
16 17 \ No newline at end of file
... ...
stim/gl/gl_spider.h
... ... @@ -479,7 +479,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
479 479 glEndList(); ///finilize the display list.
480 480 #ifdef DEBUG
481 481 for(int i = 0; i < numSamplesPos; i++)
482   - std::cout << pV[i] << std::endl;
  482 + std::cout << pV[i].str() << std::endl;
483 483 #endif
484 484 }
485 485  
... ... @@ -1151,8 +1151,8 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1151 1151 out[3] = temp[2];
1152 1152 }
1153 1153 #ifdef DEBUG
1154   -// std::cout << "out is " << out << std::endl;
1155   -// std::cout << "when rotating from " << from << " to " << dir << std::endl;
  1154 + std::cout << "out is " << out.str() << std::endl;
  1155 + std::cout << "when rotating from " << from.str() << " to " << dir.str() << std::endl;
1156 1156 #endif
1157 1157 return out;
1158 1158 }
... ... @@ -1545,7 +1545,7 @@ class gl_spider // : public virtual gl_texture&lt;T&gt;
1545 1545 setMagnitude(curSeedMag);
1546 1546  
1547 1547 #ifdef DEBUG
1548   - std::cout << "The new seed " << curSeed << curSeedVec << curSeedMag << std::endl;
  1548 + std::cout << "The new seed " << curSeed.str() << curSeedVec.str() << curSeedMag << std::endl;
1549 1549 #endif
1550 1550  
1551 1551 // Bind(direction_texID, direction_buffID, numSamples, n_pixels);
... ...
stim/grids/image_stack.h
... ... @@ -139,7 +139,7 @@ public:
139 139 /// @param depth, number of pixels in depth.
140 140 void init(int channels, int width, int height, int depth)
141 141 {
142   - R.resize(4);
  142 + //R.resize(4);
143 143 R[0] = channels;
144 144 R[1] = width;
145 145 R[2] = height;
... ...
stim/image/image.h
... ... @@ -10,6 +10,7 @@
10 10 #include <limits>
11 11 #include <typeinfo>
12 12 #include <fstream>
  13 +#include <cstring>
13 14  
14 15 namespace stim{
15 16 /// This static class provides the STIM interface for loading, saving, and storing 2D images.
... ... @@ -74,18 +75,7 @@ class image{
74 75 #endif
75 76 /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images)
76 77 T white(){
77   -
78   - if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX;
79   - if(typeid(T) == typeid(unsigned short)) return SHRT_MAX;
80   - if(typeid(T) == typeid(unsigned)) return UINT_MAX;
81   - if(typeid(T) == typeid(unsigned long)) return ULONG_MAX;
82   - if(typeid(T) == typeid(unsigned long long)) return ULLONG_MAX;
83   - if(typeid(T) == typeid(float)) return 1.0f;
84   - if(typeid(T) == typeid(double)) return 1.0;
85   -
86   - std::cout<<"ERROR in stim::image::white - no white value known for this data type"<<std::endl;
87   - exit(1);
88   -
  78 + return std::numeric_limits<T>::max();
89 79 }
90 80  
91 81  
... ...
stim/math/.vec3.h.swp 0 → 100644
No preview for this file type
stim/math/.vec3_BASE_62876.h.swp 0 → 100644
No preview for this file type
stim/math/.vec3_LOCAL_62876.h.swp 0 → 100644
No preview for this file type
stim/math/.vec3_REMOTE_62876.h.swp 0 → 100644
No preview for this file type
stim/math/bessel - Copy.h 0 → 100644
  1 +#ifndef RTS_BESSEL_H
  2 +#define RTS_BESSEL_H
  3 +
  4 +#define _USE_MATH_DEFINES
  5 +#include <math.h>
  6 +#include "../math/complex.h"
  7 +#define eps 1e-15
  8 +#define el 0.5772156649015329
  9 +
  10 +
  11 +namespace stim{
  12 +
  13 +static complex<double> cii(0.0,1.0);
  14 +static complex<double> cone(1.0,0.0);
  15 +static complex<double> czero(0.0,0.0);
  16 +
  17 +template< typename P >
  18 +P gamma(P x)
  19 +{
  20 + int i,k,m;
  21 + P ga,gr,r,z;
  22 +
  23 + static P g[] = {
  24 + 1.0,
  25 + 0.5772156649015329,
  26 + -0.6558780715202538,
  27 + -0.420026350340952e-1,
  28 + 0.1665386113822915,
  29 + -0.421977345555443e-1,
  30 + -0.9621971527877e-2,
  31 + 0.7218943246663e-2,
  32 + -0.11651675918591e-2,
  33 + -0.2152416741149e-3,
  34 + 0.1280502823882e-3,
  35 + -0.201348547807e-4,
  36 + -0.12504934821e-5,
  37 + 0.1133027232e-5,
  38 + -0.2056338417e-6,
  39 + 0.6116095e-8,
  40 + 0.50020075e-8,
  41 + -0.11812746e-8,
  42 + 0.1043427e-9,
  43 + 0.77823e-11,
  44 + -0.36968e-11,
  45 + 0.51e-12,
  46 + -0.206e-13,
  47 + -0.54e-14,
  48 + 0.14e-14};
  49 +
  50 + if (x > 171.0) return 1e308; // This value is an overflow flag.
  51 + if (x == (int)x) {
  52 + if (x > 0.0) {
  53 + ga = 1.0; // use factorial
  54 + for (i=2;i<x;i++) {
  55 + ga *= i;
  56 + }
  57 + }
  58 + else
  59 + ga = 1e308;
  60 + }
  61 + else {
  62 + if (fabs(x) > 1.0) {
  63 + z = fabs(x);
  64 + m = (int)z;
  65 + r = 1.0;
  66 + for (k=1;k<=m;k++) {
  67 + r *= (z-k);
  68 + }
  69 + z -= m;
  70 + }
  71 + else
  72 + z = x;
  73 + gr = g[24];
  74 + for (k=23;k>=0;k--) {
  75 + gr = gr*z+g[k];
  76 + }
  77 + ga = 1.0/(gr*z);
  78 + if (fabs(x) > 1.0) {
  79 + ga *= r;
  80 + if (x < 0.0) {
  81 + ga = -M_PI/(x*ga*sin(M_PI*x));
  82 + }
  83 + }
  84 + }
  85 + return ga;
  86 +}
  87 +
  88 +template<typename P>
  89 +int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1,
  90 + P &j0p,P &j1p,P &y0p,P &y1p)
  91 +{
  92 + P x2,r,ec,w0,w1,r0,r1,cs0,cs1;
  93 + P cu,p0,q0,p1,q1,t1,t2;
  94 + int k,kz;
  95 + static P a[] = {
  96 + -7.03125e-2,
  97 + 0.112152099609375,
  98 + -0.5725014209747314,
  99 + 6.074042001273483,
  100 + -1.100171402692467e2,
  101 + 3.038090510922384e3,
  102 + -1.188384262567832e5,
  103 + 6.252951493434797e6,
  104 + -4.259392165047669e8,
  105 + 3.646840080706556e10,
  106 + -3.833534661393944e12,
  107 + 4.854014686852901e14,
  108 + -7.286857349377656e16,
  109 + 1.279721941975975e19};
  110 + static P b[] = {
  111 + 7.32421875e-2,
  112 + -0.2271080017089844,
  113 + 1.727727502584457,
  114 + -2.438052969955606e1,
  115 + 5.513358961220206e2,
  116 + -1.825775547429318e4,
  117 + 8.328593040162893e5,
  118 + -5.006958953198893e7,
  119 + 3.836255180230433e9,
  120 + -3.649010818849833e11,
  121 + 4.218971570284096e13,
  122 + -5.827244631566907e15,
  123 + 9.476288099260110e17,
  124 + -1.792162323051699e20};
  125 + static P a1[] = {
  126 + 0.1171875,
  127 + -0.1441955566406250,
  128 + 0.6765925884246826,
  129 + -6.883914268109947,
  130 + 1.215978918765359e2,
  131 + -3.302272294480852e3,
  132 + 1.276412726461746e5,
  133 + -6.656367718817688e6,
  134 + 4.502786003050393e8,
  135 + -3.833857520742790e10,
  136 + 4.011838599133198e12,
  137 + -5.060568503314727e14,
  138 + 7.572616461117958e16,
  139 + -1.326257285320556e19};
  140 + static P b1[] = {
  141 + -0.1025390625,
  142 + 0.2775764465332031,
  143 + -1.993531733751297,
  144 + 2.724882731126854e1,
  145 + -6.038440767050702e2,
  146 + 1.971837591223663e4,
  147 + -8.902978767070678e5,
  148 + 5.310411010968522e7,
  149 + -4.043620325107754e9,
  150 + 3.827011346598605e11,
  151 + -4.406481417852278e13,
  152 + 6.065091351222699e15,
  153 + -9.833883876590679e17,
  154 + 1.855045211579828e20};
  155 +
  156 + if (x < 0.0) return 1;
  157 + if (x == 0.0) {
  158 + j0 = 1.0;
  159 + j1 = 0.0;
  160 + y0 = -1e308;
  161 + y1 = -1e308;
  162 + j0p = 0.0;
  163 + j1p = 0.5;
  164 + y0p = 1e308;
  165 + y1p = 1e308;
  166 + return 0;
  167 + }
  168 + x2 = x*x;
  169 + if (x <= 12.0) {
  170 + j0 = 1.0;
  171 + r = 1.0;
  172 + for (k=1;k<=30;k++) {
  173 + r *= -0.25*x2/(k*k);
  174 + j0 += r;
  175 + if (fabs(r) < fabs(j0)*1e-15) break;
  176 + }
  177 + j1 = 1.0;
  178 + r = 1.0;
  179 + for (k=1;k<=30;k++) {
  180 + r *= -0.25*x2/(k*(k+1));
  181 + j1 += r;
  182 + if (fabs(r) < fabs(j1)*1e-15) break;
  183 + }
  184 + j1 *= 0.5*x;
  185 + ec = log(0.5*x)+el;
  186 + cs0 = 0.0;
  187 + w0 = 0.0;
  188 + r0 = 1.0;
  189 + for (k=1;k<=30;k++) {
  190 + w0 += 1.0/k;
  191 + r0 *= -0.25*x2/(k*k);
  192 + r = r0 * w0;
  193 + cs0 += r;
  194 + if (fabs(r) < fabs(cs0)*1e-15) break;
  195 + }
  196 + y0 = M_2_PI*(ec*j0-cs0);
  197 + cs1 = 1.0;
  198 + w1 = 0.0;
  199 + r1 = 1.0;
  200 + for (k=1;k<=30;k++) {
  201 + w1 += 1.0/k;
  202 + r1 *= -0.25*x2/(k*(k+1));
  203 + r = r1*(2.0*w1+1.0/(k+1));
  204 + cs1 += r;
  205 + if (fabs(r) < fabs(cs1)*1e-15) break;
  206 + }
  207 + y1 = M_2_PI * (ec*j1-1.0/x-0.25*x*cs1);
  208 + }
  209 + else {
  210 + if (x >= 50.0) kz = 8; // Can be changed to 10
  211 + else if (x >= 35.0) kz = 10; // " " 12
  212 + else kz = 12; // " " 14
  213 + t1 = x-M_PI_4;
  214 + p0 = 1.0;
  215 + q0 = -0.125/x;
  216 + for (k=0;k<kz;k++) {
  217 + p0 += a[k]*pow(x,-2*k-2);
  218 + q0 += b[k]*pow(x,-2*k-3);
  219 + }
  220 + cu = sqrt(M_2_PI/x);
  221 + j0 = cu*(p0*cos(t1)-q0*sin(t1));
  222 + y0 = cu*(p0*sin(t1)+q0*cos(t1));
  223 + t2 = x-0.75*M_PI;
  224 + p1 = 1.0;
  225 + q1 = 0.375/x;
  226 + for (k=0;k<kz;k++) {
  227 + p1 += a1[k]*pow(x,-2*k-2);
  228 + q1 += b1[k]*pow(x,-2*k-3);
  229 + }
  230 + j1 = cu*(p1*cos(t2)-q1*sin(t2));
  231 + y1 = cu*(p1*sin(t2)+q1*cos(t2));
  232 + }
  233 + j0p = -j1;
  234 + j1p = j0-j1/x;
  235 + y0p = -y1;
  236 + y1p = y0-y1/x;
  237 + return 0;
  238 +}
  239 +//
  240 +// INPUT:
  241 +// double x -- argument of Bessel function
  242 +//
  243 +// OUTPUT:
  244 +// double j0 -- Bessel function of 1st kind, 0th order
  245 +// double j1 -- Bessel function of 1st kind, 1st order
  246 +// double y0 -- Bessel function of 2nd kind, 0th order
  247 +// double y1 -- Bessel function of 2nd kind, 1st order
  248 +// double j0p -- derivative of Bessel function of 1st kind, 0th order
  249 +// double j1p -- derivative of Bessel function of 1st kind, 1st order
  250 +// double y0p -- derivative of Bessel function of 2nd kind, 0th order
  251 +// double y1p -- derivative of Bessel function of 2nd kind, 1st order
  252 +//
  253 +// RETURN:
  254 +// int error code: 0 = OK, 1 = error
  255 +//
  256 +// This algorithm computes the functions using polynomial approximations.
  257 +//
  258 +template<typename P>
  259 +int bessjy01b(P x,P &j0,P &j1,P &y0,P &y1,
  260 + P &j0p,P &j1p,P &y0p,P &y1p)
  261 +{
  262 + P t,t2,dtmp,a0,p0,q0,p1,q1,ta0,ta1;
  263 + if (x < 0.0) return 1;
  264 + if (x == 0.0) {
  265 + j0 = 1.0;
  266 + j1 = 0.0;
  267 + y0 = -1e308;
  268 + y1 = -1e308;
  269 + j0p = 0.0;
  270 + j1p = 0.5;
  271 + y0p = 1e308;
  272 + y1p = 1e308;
  273 + return 0;
  274 + }
  275 + if(x <= 4.0) {
  276 + t = x/4.0;
  277 + t2 = t*t;
  278 + j0 = ((((((-0.5014415e-3*t2+0.76771853e-2)*t2-0.0709253492)*t2+
  279 + 0.4443584263)*t2-1.7777560599)*t2+3.9999973021)*t2
  280 + -3.9999998721)*t2+1.0;
  281 + j1 = t*(((((((-0.1289769e-3*t2+0.22069155e-2)*t2-0.0236616773)*t2+
  282 + 0.1777582922)*t2-0.8888839649)*t2+2.6666660544)*t2-
  283 + 3.999999971)*t2+1.9999999998);
  284 + dtmp = (((((((-0.567433e-4*t2+0.859977e-3)*t2-0.94855882e-2)*t2+
  285 + 0.0772975809)*t2-0.4261737419)*t2+1.4216421221)*t2-
  286 + 2.3498519931)*t2+1.0766115157)*t2+0.3674669052;
  287 + y0 = M_2_PI*log(0.5*x)*j0+dtmp;
  288 + dtmp = (((((((0.6535773e-3*t2-0.0108175626)*t2+0.107657607)*t2-
  289 + 0.7268945577)*t2+3.1261399273)*t2-7.3980241381)*t2+
  290 + 6.8529236342)*t2+0.3932562018)*t2-0.6366197726;
  291 + y1 = M_2_PI*log(0.5*x)*j1+dtmp/x;
  292 + }
  293 + else {
  294 + t = 4.0/x;
  295 + t2 = t*t;
  296 + a0 = sqrt(M_2_PI/x);
  297 + p0 = ((((-0.9285e-5*t2+0.43506e-4)*t2-0.122226e-3)*t2+
  298 + 0.434725e-3)*t2-0.4394275e-2)*t2+0.999999997;
  299 + q0 = t*(((((0.8099e-5*t2-0.35614e-4)*t2+0.85844e-4)*t2-
  300 + 0.218024e-3)*t2+0.1144106e-2)*t2-0.031249995);
  301 + ta0 = x-M_PI_4;
  302 + j0 = a0*(p0*cos(ta0)-q0*sin(ta0));
  303 + y0 = a0*(p0*sin(ta0)+q0*cos(ta0));
  304 + p1 = ((((0.10632e-4*t2-0.50363e-4)*t2+0.145575e-3)*t2
  305 + -0.559487e-3)*t2+0.7323931e-2)*t2+1.000000004;
  306 + q1 = t*(((((-0.9173e-5*t2+0.40658e-4)*t2-0.99941e-4)*t2
  307 + +0.266891e-3)*t2-0.1601836e-2)*t2+0.093749994);
  308 + ta1 = x-0.75*M_PI;
  309 + j1 = a0*(p1*cos(ta1)-q1*sin(ta1));
  310 + y1 = a0*(p1*sin(ta1)+q1*cos(ta1));
  311 + }
  312 + j0p = -j1;
  313 + j1p = j0-j1/x;
  314 + y0p = -y1;
  315 + y1p = y0-y1/x;
  316 + return 0;
  317 +}
  318 +template<typename P>
  319 +int msta1(P x,int mp)
  320 +{
  321 + P a0,f0,f1,f;
  322 + int i,n0,n1,nn;
  323 +
  324 + a0 = fabs(x);
  325 + n0 = (int)(1.1*a0)+1;
  326 + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-mp;
  327 + n1 = n0+5;
  328 + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-mp;
  329 + for (i=0;i<20;i++) {
  330 + nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
  331 + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp;
  332 + if (std::abs(nn-n1) < 1) break;
  333 + n0 = n1;
  334 + f0 = f1;
  335 + n1 = nn;
  336 + f1 = f;
  337 + }
  338 + return nn;
  339 +}
  340 +template<typename P>
  341 +int msta2(P x,int n,int mp)
  342 +{
  343 + P a0,ejn,hmp,f0,f1,f,obj;
  344 + int i,n0,n1,nn;
  345 +
  346 + a0 = fabs(x);
  347 + hmp = 0.5*mp;
  348 + ejn = 0.5*log10(6.28*n)-n*log10(1.36*a0/n);
  349 + if (ejn <= hmp) {
  350 + obj = mp;
  351 + n0 = (int)(1.1*a0);
  352 + if (n0 < 1) n0 = 1;
  353 + }
  354 + else {
  355 + obj = hmp+ejn;
  356 + n0 = n;
  357 + }
  358 + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-obj;
  359 + n1 = n0+5;
  360 + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-obj;
  361 + for (i=0;i<20;i++) {
  362 + nn = (int)(n1-(n1-n0)/(1.0-f0/f1));
  363 + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj;
  364 + if (std::abs(nn-n1) < 1) break;
  365 + n0 = n1;
  366 + f0 = f1;
  367 + n1 = nn;
  368 + f1 = f;
  369 + }
  370 + return nn+10;
  371 +}
  372 +//
  373 +// INPUT:
  374 +// double x -- argument of Bessel function of 1st and 2nd kind.
  375 +// int n -- order
  376 +//
  377 +// OUPUT:
  378 +//
  379 +// int nm -- highest order actually computed (nm <= n)
  380 +// double jn[] -- Bessel function of 1st kind, orders from 0 to nm
  381 +// double yn[] -- Bessel function of 2nd kind, orders from 0 to nm
  382 +// double j'n[]-- derivative of Bessel function of 1st kind,
  383 +// orders from 0 to nm
  384 +// double y'n[]-- derivative of Bessel function of 2nd kind,
  385 +// orders from 0 to nm
  386 +//
  387 +// Computes Bessel functions of all order up to 'n' using recurrence
  388 +// relations. If 'nm' < 'n' only 'nm' orders are returned.
  389 +//
  390 +template<typename P>
  391 +int bessjyna(int n,P x,int &nm,P *jn,P *yn,
  392 + P *jnp,P *ynp)
  393 +{
  394 + P bj0,bj1,f,f0,f1,f2,cs;
  395 + int i,k,m,ecode;
  396 +
  397 + nm = n;
  398 + if ((x < 0.0) || (n < 0)) return 1;
  399 + if (x < 1e-15) {
  400 + for (i=0;i<=n;i++) {
  401 + jn[i] = 0.0;
  402 + yn[i] = -1e308;
  403 + jnp[i] = 0.0;
  404 + ynp[i] = 1e308;
  405 + }
  406 + jn[0] = 1.0;
  407 + jnp[1] = 0.5;
  408 + return 0;
  409 + }
  410 + ecode = bessjy01a(x,jn[0],jn[1],yn[0],yn[1],jnp[0],jnp[1],ynp[0],ynp[1]);
  411 + if (n < 2) return 0;
  412 + bj0 = jn[0];
  413 + bj1 = jn[1];
  414 + if (n < (int)0.9*x) {
  415 + for (k=2;k<=n;k++) {
  416 + jn[k] = 2.0*(k-1.0)*bj1/x-bj0;
  417 + bj0 = bj1;
  418 + bj1 = jn[k];
  419 + }
  420 + }
  421 + else {
  422 + m = msta1(x,200);
  423 + if (m < n) nm = m;
  424 + else m = msta2(x,n,15);
  425 + f2 = 0.0;
  426 + f1 = 1.0e-100;
  427 + for (k=m;k>=0;k--) {
  428 + f = 2.0*(k+1.0)/x*f1-f2;
  429 + if (k <= nm) jn[k] = f;
  430 + f2 = f1;
  431 + f1 = f;
  432 + }
  433 + if (fabs(bj0) > fabs(bj1)) cs = bj0/f;
  434 + else cs = bj1/f2;
  435 + for (k=0;k<=nm;k++) {
  436 + jn[k] *= cs;
  437 + }
  438 + }
  439 + for (k=2;k<=nm;k++) {
  440 + jnp[k] = jn[k-1]-k*jn[k]/x;
  441 + }
  442 + f0 = yn[0];
  443 + f1 = yn[1];
  444 + for (k=2;k<=nm;k++) {
  445 + f = 2.0*(k-1.0)*f1/x-f0;
  446 + yn[k] = f;
  447 + f0 = f1;
  448 + f1 = f;
  449 + }
  450 + for (k=2;k<=nm;k++) {
  451 + ynp[k] = yn[k-1]-k*yn[k]/x;
  452 + }
  453 + return 0;
  454 +}
  455 +//
  456 +// Same input and output conventions as above. Different recurrence
  457 +// relations used for 'x' < 300.
  458 +//
  459 +template<typename P>
  460 +int bessjynb(int n,P x,int &nm,P *jn,P *yn,
  461 + P *jnp,P *ynp)
  462 +{
  463 + P t1,t2,f,f1,f2,bj0,bj1,bjk,by0,by1,cu,s0,su,sv;
  464 + P ec,bs,byk,p0,p1,q0,q1;
  465 + static P a[] = {
  466 + -0.7031250000000000e-1,
  467 + 0.1121520996093750,
  468 + -0.5725014209747314,
  469 + 6.074042001273483};
  470 + static P b[] = {
  471 + 0.7324218750000000e-1,
  472 + -0.2271080017089844,
  473 + 1.727727502584457,
  474 + -2.438052969955606e1};
  475 + static P a1[] = {
  476 + 0.1171875,
  477 + -0.1441955566406250,
  478 + 0.6765925884246826,
  479 + -6.883914268109947};
  480 + static P b1[] = {
  481 + -0.1025390625,
  482 + 0.2775764465332031,
  483 + -1.993531733751297,
  484 + 2.724882731126854e1};
  485 +
  486 + int i,k,m;
  487 + nm = n;
  488 + if ((x < 0.0) || (n < 0)) return 1;
  489 + if (x < 1e-15) {
  490 + for (i=0;i<=n;i++) {
  491 + jn[i] = 0.0;
  492 + yn[i] = -1e308;
  493 + jnp[i] = 0.0;
  494 + ynp[i] = 1e308;
  495 + }
  496 + jn[0] = 1.0;
  497 + jnp[1] = 0.5;
  498 + return 0;
  499 + }
  500 + if (x <= 300.0 || n > (int)(0.9*x)) {
  501 + if (n == 0) nm = 1;
  502 + m = msta1(x,200);
  503 + if (m < nm) nm = m;
  504 + else m = msta2(x,nm,15);
  505 + bs = 0.0;
  506 + su = 0.0;
  507 + sv = 0.0;
  508 + f2 = 0.0;
  509 + f1 = 1.0e-100;
  510 + for (k = m;k>=0;k--) {
  511 + f = 2.0*(k+1.0)/x*f1 - f2;
  512 + if (k <= nm) jn[k] = f;
  513 + if ((k == 2*(int)(k/2)) && (k != 0)) {
  514 + bs += 2.0*f;
  515 +// su += pow(-1,k>>1)*f/(double)k;
  516 + su += (-1)*((k & 2)-1)*f/(P)k;
  517 + }
  518 + else if (k > 1) {
  519 +// sv += pow(-1,k>>1)*k*f/(k*k-1.0);
  520 + sv += (-1)*((k & 2)-1)*(P)k*f/(k*k-1.0);
  521 + }
  522 + f2 = f1;
  523 + f1 = f;
  524 + }
  525 + s0 = bs+f;
  526 + for (k=0;k<=nm;k++) {
  527 + jn[k] /= s0;
  528 + }
  529 + ec = log(0.5*x) +0.5772156649015329;
  530 + by0 = M_2_PI*(ec*jn[0]-4.0*su/s0);
  531 + yn[0] = by0;
  532 + by1 = M_2_PI*((ec-1.0)*jn[1]-jn[0]/x-4.0*sv/s0);
  533 + yn[1] = by1;
  534 + }
  535 + else {
  536 + t1 = x-M_PI_4;
  537 + p0 = 1.0;
  538 + q0 = -0.125/x;
  539 + for (k=0;k<4;k++) {
  540 + p0 += a[k]*pow(x,-2*k-2);
  541 + q0 += b[k]*pow(x,-2*k-3);
  542 + }
  543 + cu = sqrt(M_2_PI/x);
  544 + bj0 = cu*(p0*cos(t1)-q0*sin(t1));
  545 + by0 = cu*(p0*sin(t1)+q0*cos(t1));
  546 + jn[0] = bj0;
  547 + yn[0] = by0;
  548 + t2 = x-0.75*M_PI;
  549 + p1 = 1.0;
  550 + q1 = 0.375/x;
  551 + for (k=0;k<4;k++) {
  552 + p1 += a1[k]*pow(x,-2*k-2);
  553 + q1 += b1[k]*pow(x,-2*k-3);
  554 + }
  555 + bj1 = cu*(p1*cos(t2)-q1*sin(t2));
  556 + by1 = cu*(p1*sin(t2)+q1*cos(t2));
  557 + jn[1] = bj1;
  558 + yn[1] = by1;
  559 + for (k=2;k<=nm;k++) {
  560 + bjk = 2.0*(k-1.0)*bj1/x-bj0;
  561 + jn[k] = bjk;
  562 + bj0 = bj1;
  563 + bj1 = bjk;
  564 + }
  565 + }
  566 + jnp[0] = -jn[1];
  567 + for (k=1;k<=nm;k++) {
  568 + jnp[k] = jn[k-1]-k*jn[k]/x;
  569 + }
  570 + for (k=2;k<=nm;k++) {
  571 + byk = 2.0*(k-1.0)*by1/x-by0;
  572 + yn[k] = byk;
  573 + by0 = by1;
  574 + by1 = byk;
  575 + }
  576 + ynp[0] = -yn[1];
  577 + for (k=1;k<=nm;k++) {
  578 + ynp[k] = yn[k-1]-k*yn[k]/x;
  579 + }
  580 + return 0;
  581 +
  582 +}
  583 +
  584 +// The following routine computes Bessel Jv(x) and Yv(x) for
  585 +// arbitrary positive order (v). For negative order, use:
  586 +//
  587 +// J-v(x) = Jv(x)cos(v pi) - Yv(x)sin(v pi)
  588 +// Y-v(x) = Jv(x)sin(v pi) + Yv(x)cos(v pi)
  589 +//
  590 +template<typename P>
  591 +int bessjyv(P v,P x,P &vm,P *jv,P *yv,
  592 + P *djv,P *dyv)
  593 +{
  594 + P v0,vl,vg,vv,a,a0,r,x2,bjv0,bjv1,bjvl,f,f0,f1,f2;
  595 + P r0,r1,ck,cs,cs0,cs1,sk,qx,px,byv0,byv1,rp,xk,rq;
  596 + P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk;
  597 + int j,k,l,m,n,kz;
  598 +
  599 + x2 = x*x;
  600 + n = (int)v;
  601 + v0 = v-n;
  602 + if ((x < 0.0) || (v < 0.0)) return 1;
  603 + if (x < 1e-15) {
  604 + for (k=0;k<=n;k++) {
  605 + jv[k] = 0.0;
  606 + yv[k] = -1e308;
  607 + djv[k] = 0.0;
  608 + dyv[k] = 1e308;
  609 + if (v0 == 0.0) {
  610 + jv[0] = 1.0;
  611 + djv[1] = 0.5;
  612 + }
  613 + else djv[0] = 1e308;
  614 + }
  615 + vm = v;
  616 + return 0;
  617 + }
  618 + if (x <= 12.0) {
  619 + for (l=0;l<2;l++) {
  620 + vl = v0 + l;
  621 + bjvl = 1.0;
  622 + r = 1.0;
  623 + for (k=1;k<=40;k++) {
  624 + r *= -0.25*x2/(k*(k+vl));
  625 + bjvl += r;
  626 + if (fabs(r) < fabs(bjvl)*1e-15) break;
  627 + }
  628 + vg = 1.0 + vl;
  629 + a = pow(0.5*x,vl)/gamma(vg);
  630 + if (l == 0) bjv0 = bjvl*a;
  631 + else bjv1 = bjvl*a;
  632 + }
  633 + }
  634 + else {
  635 + if (x >= 50.0) kz = 8;
  636 + else if (x >= 35.0) kz = 10;
  637 + else kz = 11;
  638 + for (j=0;j<2;j++) {
  639 + vv = 4.0*(j+v0)*(j+v0);
  640 + px = 1.0;
  641 + rp = 1.0;
  642 + for (k=1;k<=kz;k++) {
  643 + rp *= (-0.78125e-2)*(vv-pow(4.0*k-3.0,2.0))*
  644 + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*x2);
  645 + px += rp;
  646 + }
  647 + qx = 1.0;
  648 + rq = 1.0;
  649 + for (k=1;k<=kz;k++) {
  650 + rq *= (-0.78125e-2)*(vv-pow(4.0*k-1.0,2.0))*
  651 + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*x2);
  652 + qx += rq;
  653 + }
  654 + qx *= 0.125*(vv-1.0)/x;
  655 + xk = x-(0.5*(j+v0)+0.25)*M_PI;
  656 + a0 = sqrt(M_2_PI/x);
  657 + ck = cos(xk);
  658 + sk = sin(xk);
  659 +
  660 + if (j == 0) {
  661 + bjv0 = a0*(px*ck-qx*sk);
  662 + byv0 = a0*(px*sk+qx*ck);
  663 + }
  664 + else if (j == 1) {
  665 + bjv1 = a0*(px*ck-qx*sk);
  666 + byv1 = a0*(px*sk+qx*ck);
  667 + }
  668 + }
  669 + }
  670 + jv[0] = bjv0;
  671 + jv[1] = bjv1;
  672 + djv[0] = v0*jv[0]/x-jv[1];
  673 + djv[1] = -(1.0+v0)*jv[1]/x+jv[0];
  674 + if ((n >= 2) && (n <= (int)(0.9*x))) {
  675 + f0 = bjv0;
  676 + f1 = bjv1;
  677 + for (k=2;k<=n;k++) {
  678 + f = 2.0*(k+v0-1.0)*f1/x-f0;
  679 + jv[k] = f;
  680 + f0 = f1;
  681 + f1 = f;
  682 + }
  683 + }
  684 + else if (n >= 2) {
  685 + m = msta1(x,200);
  686 + if (m < n) n = m;
  687 + else m = msta2(x,n,15);
  688 + f2 = 0.0;
  689 + f1 = 1.0e-100;
  690 + for (k=m;k>=0;k--) {
  691 + f = 2.0*(v0+k+1.0)*f1/x-f2;
  692 + if (k <= n) jv[k] = f;
  693 + f2 = f1;
  694 + f1 = f;
  695 + }
  696 + if (fabs(bjv0) > fabs(bjv1)) cs = bjv0/f;
  697 + else cs = bjv1/f2;
  698 + for (k=0;k<=n;k++) {
  699 + jv[k] *= cs;
  700 + }
  701 + }
  702 + for (k=2;k<=n;k++) {
  703 + djv[k] = -(k+v0)*jv[k]/x+jv[k-1];
  704 + }
  705 + if (x <= 12.0) {
  706 + if (v0 != 0.0) {
  707 + for (l=0;l<2;l++) {
  708 + vl = v0 +l;
  709 + bjvl = 1.0;
  710 + r = 1.0;
  711 + for (k=1;k<=40;k++) {
  712 + r *= -0.25*x2/(k*(k-vl));
  713 + bjvl += r;
  714 + if (fabs(r) < fabs(bjvl)*1e-15) break;
  715 + }
  716 + vg = 1.0-vl;
  717 + b = pow(2.0/x,vl)/gamma(vg);
  718 + if (l == 0) bju0 = bjvl*b;
  719 + else bju1 = bjvl*b;
  720 + }
  721 + pv0 = M_PI*v0;
  722 + pv1 = M_PI*(1.0+v0);
  723 + byv0 = (bjv0*cos(pv0)-bju0)/sin(pv0);
  724 + byv1 = (bjv1*cos(pv1)-bju1)/sin(pv1);
  725 + }
  726 + else {
  727 + ec = log(0.5*x)+el;
  728 + cs0 = 0.0;
  729 + w0 = 0.0;
  730 + r0 = 1.0;
  731 + for (k=1;k<=30;k++) {
  732 + w0 += 1.0/k;
  733 + r0 *= -0.25*x2/(k*k);
  734 + cs0 += r0*w0;
  735 + }
  736 + byv0 = M_2_PI*(ec*bjv0-cs0);
  737 + cs1 = 1.0;
  738 + w1 = 0.0;
  739 + r1 = 1.0;
  740 + for (k=1;k<=30;k++) {
  741 + w1 += 1.0/k;
  742 + r1 *= -0.25*x2/(k*(k+1));
  743 + cs1 += r1*(2.0*w1+1.0/(k+1.0));
  744 + }
  745 + byv1 = M_2_PI*(ec*bjv1-1.0/x-0.25*x*cs1);
  746 + }
  747 + }
  748 + yv[0] = byv0;
  749 + yv[1] = byv1;
  750 + for (k=2;k<=n;k++) {
  751 + byvk = 2.0*(v0+k-1.0)*byv1/x-byv0;
  752 + yv[k] = byvk;
  753 + byv0 = byv1;
  754 + byv1 = byvk;
  755 + }
  756 + dyv[0] = v0*yv[0]/x-yv[1];
  757 + for (k=1;k<=n;k++) {
  758 + dyv[k] = -(k+v0)*yv[k]/x+yv[k-1];
  759 + }
  760 + vm = n + v0;
  761 + return 0;
  762 +}
  763 +
  764 +template<typename P>
  765 +int bessjyv_sph(int v, P z, P &vm, P* cjv,
  766 + P* cyv, P* cjvp, P* cyvp)
  767 +{
  768 + //first, compute the bessel functions of fractional order
  769 + bessjyv<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  770 +
  771 + //iterate through each and scale
  772 + for(int n = 0; n<=v; n++)
  773 + {
  774 +
  775 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  776 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  777 +
  778 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  779 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
  780 + }
  781 +
  782 + return 0;
  783 +
  784 +}
  785 +
  786 +template<typename P>
  787 +int cbessjy01(complex<P> z,complex<P> &cj0,complex<P> &cj1,
  788 + complex<P> &cy0,complex<P> &cy1,complex<P> &cj0p,
  789 + complex<P> &cj1p,complex<P> &cy0p,complex<P> &cy1p)
  790 +{
  791 + complex<P> z1,z2,cr,cp,cs,cp0,cq0,cp1,cq1,ct1,ct2,cu;
  792 + P a0,w0,w1;
  793 + int k,kz;
  794 +
  795 + static P a[] = {
  796 + -7.03125e-2,
  797 + 0.112152099609375,
  798 + -0.5725014209747314,
  799 + 6.074042001273483,
  800 + -1.100171402692467e2,
  801 + 3.038090510922384e3,
  802 + -1.188384262567832e5,
  803 + 6.252951493434797e6,
  804 + -4.259392165047669e8,
  805 + 3.646840080706556e10,
  806 + -3.833534661393944e12,
  807 + 4.854014686852901e14,
  808 + -7.286857349377656e16,
  809 + 1.279721941975975e19};
  810 + static P b[] = {
  811 + 7.32421875e-2,
  812 + -0.2271080017089844,
  813 + 1.727727502584457,
  814 + -2.438052969955606e1,
  815 + 5.513358961220206e2,
  816 + -1.825775547429318e4,
  817 + 8.328593040162893e5,
  818 + -5.006958953198893e7,
  819 + 3.836255180230433e9,
  820 + -3.649010818849833e11,
  821 + 4.218971570284096e13,
  822 + -5.827244631566907e15,
  823 + 9.476288099260110e17,
  824 + -1.792162323051699e20};
  825 + static P a1[] = {
  826 + 0.1171875,
  827 + -0.1441955566406250,
  828 + 0.6765925884246826,
  829 + -6.883914268109947,
  830 + 1.215978918765359e2,
  831 + -3.302272294480852e3,
  832 + 1.276412726461746e5,
  833 + -6.656367718817688e6,
  834 + 4.502786003050393e8,
  835 + -3.833857520742790e10,
  836 + 4.011838599133198e12,
  837 + -5.060568503314727e14,
  838 + 7.572616461117958e16,
  839 + -1.326257285320556e19};
  840 + static P b1[] = {
  841 + -0.1025390625,
  842 + 0.2775764465332031,
  843 + -1.993531733751297,
  844 + 2.724882731126854e1,
  845 + -6.038440767050702e2,
  846 + 1.971837591223663e4,
  847 + -8.902978767070678e5,
  848 + 5.310411010968522e7,
  849 + -4.043620325107754e9,
  850 + 3.827011346598605e11,
  851 + -4.406481417852278e13,
  852 + 6.065091351222699e15,
  853 + -9.833883876590679e17,
  854 + 1.855045211579828e20};
  855 +
  856 + a0 = abs(z);
  857 + z2 = z*z;
  858 + z1 = z;
  859 + if (a0 == 0.0) {
  860 + cj0 = cone;
  861 + cj1 = czero;
  862 + cy0 = complex<P>(-1e308,0);
  863 + cy1 = complex<P>(-1e308,0);
  864 + cj0p = czero;
  865 + cj1p = complex<P>(0.5,0.0);
  866 + cy0p = complex<P>(1e308,0);
  867 + cy1p = complex<P>(1e308,0);
  868 + return 0;
  869 + }
  870 + if (real(z) < 0.0) z1 = -z;
  871 + if (a0 <= 12.0) {
  872 + cj0 = cone;
  873 + cr = cone;
  874 + for (k=1;k<=40;k++) {
  875 + cr *= -0.25*z2/(P)(k*k);
  876 + cj0 += cr;
  877 + if (abs(cr) < abs(cj0)*eps) break;
  878 + }
  879 + cj1 = cone;
  880 + cr = cone;
  881 + for (k=1;k<=40;k++) {
  882 + cr *= -0.25*z2/(k*(k+1.0));
  883 + cj1 += cr;
  884 + if (abs(cr) < abs(cj1)*eps) break;
  885 + }
  886 + cj1 *= 0.5*z1;
  887 + w0 = 0.0;
  888 + cr = cone;
  889 + cs = czero;
  890 + for (k=1;k<=40;k++) {
  891 + w0 += 1.0/k;
  892 + cr *= -0.25*z2/(P)(k*k);
  893 + cp = cr*w0;
  894 + cs += cp;
  895 + if (abs(cp) < abs(cs)*eps) break;
  896 + }
  897 + cy0 = M_2_PI*((log(0.5*z1)+el)*cj0-cs);
  898 + w1 = 0.0;
  899 + cr = cone;
  900 + cs = cone;
  901 + for (k=1;k<=40;k++) {
  902 + w1 += 1.0/k;
  903 + cr *= -0.25*z2/(k*(k+1.0));
  904 + cp = cr*(2.0*w1+1.0/(k+1.0));
  905 + cs += cp;
  906 + if (abs(cp) < abs(cs)*eps) break;
  907 + }
  908 + cy1 = M_2_PI*((log(0.5*z1)+el)*cj1-1.0/z1-0.25*z1*cs);
  909 + }
  910 + else {
  911 + if (a0 >= 50.0) kz = 8; // can be changed to 10
  912 + else if (a0 >= 35.0) kz = 10; // " " " 12
  913 + else kz = 12; // " " " 14
  914 + ct1 = z1 - M_PI_4;
  915 + cp0 = cone;
  916 + for (k=0;k<kz;k++) {
  917 + cp0 += a[k]*pow(z1,-2.0*k-2.0);
  918 + }
  919 + cq0 = -0.125/z1;
  920 + for (k=0;k<kz;k++) {
  921 + cq0 += b[k]*pow(z1,-2.0*k-3.0);
  922 + }
  923 + cu = sqrt(M_2_PI/z1);
  924 + cj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1));
  925 + cy0 = cu*(cp0*sin(ct1)+cq0*cos(ct1));
  926 + ct2 = z1 - 0.75*M_PI;
  927 + cp1 = cone;
  928 + for (k=0;k<kz;k++) {
  929 + cp1 += a1[k]*pow(z1,-2.0*k-2.0);
  930 + }
  931 + cq1 = 0.375/z1;
  932 + for (k=0;k<kz;k++) {
  933 + cq1 += b1[k]*pow(z1,-2.0*k-3.0);
  934 + }
  935 + cj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2));
  936 + cy1 = cu*(cp1*sin(ct2)+cq1*cos(ct2));
  937 + }
  938 + if (real(z) < 0.0) {
  939 + if (imag(z) < 0.0) {
  940 + cy0 -= 2.0*cii*cj0;
  941 + cy1 = -(cy1-2.0*cii*cj1);
  942 + }
  943 + else if (imag(z) > 0.0) {
  944 + cy0 += 2.0*cii*cj0;
  945 + cy1 = -(cy1+2.0*cii*cj1);
  946 + }
  947 + cj1 = -cj1;
  948 + }
  949 + cj0p = -cj1;
  950 + cj1p = cj0-cj1/z;
  951 + cy0p = -cy1;
  952 + cy1p = cy0-cy1/z;
  953 + return 0;
  954 +}
  955 +
  956 +template<typename P>
  957 +int cbessjyna(int n,complex<P> z,int &nm,complex<P> *cj,
  958 + complex<P> *cy,complex<P> *cjp,complex<P> *cyp)
  959 +{
  960 + complex<P> cbj0,cbj1,cby0,cby1,cj0,cjk,cj1,cf,cf1,cf2;
  961 + complex<P> cs,cg0,cg1,cyk,cyl1,cyl2,cylk,cp11,cp12,cp21,cp22;
  962 + complex<P> ch0,ch1,ch2;
  963 + P a0,yak,ya1,ya0,wa;
  964 + int m,k,lb,lb0;
  965 +
  966 + if (n < 0) return 1;
  967 + a0 = abs(z);
  968 + nm = n;
  969 + if (a0 < 1.0e-100) {
  970 + for (k=0;k<=n;k++) {
  971 + cj[k] = czero;
  972 + cy[k] = complex<P> (-1e308,0);
  973 + cjp[k] = czero;
  974 + cyp[k] = complex<P>(1e308,0);
  975 + }
  976 + cj[0] = cone;
  977 + cjp[1] = complex<P>(0.5,0.0);
  978 + return 0;
  979 + }
  980 + cbessjy01(z,cj[0],cj[1],cy[0],cy[1],cjp[0],cjp[1],cyp[0],cyp[1]);
  981 + cbj0 = cj[0];
  982 + cbj1 = cj[1];
  983 + cby0 = cy[0];
  984 + cby1 = cy[1];
  985 + if (n <= 1) return 0;
  986 + if (n < (int)0.25*a0) {
  987 + cj0 = cbj0;
  988 + cj1 = cbj1;
  989 + for (k=2;k<=n;k++) {
  990 + cjk = 2.0*(k-1.0)*cj1/z-cj0;
  991 + cj[k] = cjk;
  992 + cj0 = cj1;
  993 + cj1 = cjk;
  994 + }
  995 + }
  996 + else {
  997 + m = msta1(a0,200);
  998 + if (m < n) nm = m;
  999 + else m = msta2(a0,n,15);
  1000 + cf2 = czero;
  1001 + cf1 = complex<P> (1.0e-100,0.0);
  1002 + for (k=m;k>=0;k--) {
  1003 + cf = 2.0*(k+1.0)*cf1/z-cf2;
  1004 + if (k <=nm) cj[k] = cf;
  1005 + cf2 = cf1;
  1006 + cf1 = cf;
  1007 + }
  1008 + if (abs(cbj0) > abs(cbj1)) cs = cbj0/cf;
  1009 + else cs = cbj1/cf2;
  1010 + for (k=0;k<=nm;k++) {
  1011 + cj[k] *= cs;
  1012 + }
  1013 + }
  1014 + for (k=2;k<=nm;k++) {
  1015 + cjp[k] = cj[k-1]-(P)k*cj[k]/z;
  1016 + }
  1017 + ya0 = abs(cby0);
  1018 + lb = 0;
  1019 + cg0 = cby0;
  1020 + cg1 = cby1;
  1021 + for (k=2;k<=nm;k++) {
  1022 + cyk = 2.0*(k-1.0)*cg1/z-cg0;
  1023 + yak = abs(cyk);
  1024 + ya1 = abs(cg0);
  1025 + if ((yak < ya0) && (yak < ya1)) lb = k;
  1026 + cy[k] = cyk;
  1027 + cg0 = cg1;
  1028 + cg1 = cyk;
  1029 + }
  1030 + lb0 = 0;
  1031 + if ((lb > 4) && (imag(z) != 0.0)) {
  1032 + while (lb != lb0) {
  1033 + ch2 = cone;
  1034 + ch1 = czero;
  1035 + lb0 = lb;
  1036 + for (k=lb;k>=1;k--) {
  1037 + ch0 = 2.0*k*ch1/z-ch2;
  1038 + ch2 = ch1;
  1039 + ch1 = ch0;
  1040 + }
  1041 + cp12 = ch0;
  1042 + cp22 = ch2;
  1043 + ch2 = czero;
  1044 + ch1 = cone;
  1045 + for (k=lb;k>=1;k--) {
  1046 + ch0 = 2.0*k*ch1/z-ch2;
  1047 + ch2 = ch1;
  1048 + ch1 = ch0;
  1049 + }
  1050 + cp11 = ch0;
  1051 + cp21 = ch2;
  1052 + if (lb == nm)
  1053 + cj[lb+1] = 2.0*lb*cj[lb]/z-cj[lb-1];
  1054 + if (abs(cj[0]) > abs(cj[1])) {
  1055 + cy[lb+1] = (cj[lb+1]*cby0-2.0*cp11/(M_PI*z))/cj[0];
  1056 + cy[lb] = (cj[lb]*cby0+2.0*cp12/(M_PI*z))/cj[0];
  1057 + }
  1058 + else {
  1059 + cy[lb+1] = (cj[lb+1]*cby1-2.0*cp21/(M_PI*z))/cj[1];
  1060 + cy[lb] = (cj[lb]*cby1+2.0*cp22/(M_PI*z))/cj[1];
  1061 + }
  1062 + cyl2 = cy[lb+1];
  1063 + cyl1 = cy[lb];
  1064 + for (k=lb-1;k>=0;k--) {
  1065 + cylk = 2.0*(k+1.0)*cyl1/z-cyl2;
  1066 + cy[k] = cylk;
  1067 + cyl2 = cyl1;
  1068 + cyl1 = cylk;
  1069 + }
  1070 + cyl1 = cy[lb];
  1071 + cyl2 = cy[lb+1];
  1072 + for (k=lb+1;k<n;k++) {
  1073 + cylk = 2.0*k*cyl2/z-cyl1;
  1074 + cy[k+1] = cylk;
  1075 + cyl1 = cyl2;
  1076 + cyl2 = cylk;
  1077 + }
  1078 + for (k=2;k<=nm;k++) {
  1079 + wa = abs(cy[k]);
  1080 + if (wa < abs(cy[k-1])) lb = k;
  1081 + }
  1082 + }
  1083 + }
  1084 + for (k=2;k<=nm;k++) {
  1085 + cyp[k] = cy[k-1]-(P)k*cy[k]/z;
  1086 + }
  1087 + return 0;
  1088 +}
  1089 +
  1090 +template<typename P>
  1091 +int cbessjynb(int n,complex<P> z,int &nm,complex<P> *cj,
  1092 + complex<P> *cy,complex<P> *cjp,complex<P> *cyp)
  1093 +{
  1094 + complex<P> cf,cf0,cf1,cf2,cbs,csu,csv,cs0,ce;
  1095 + complex<P> ct1,cp0,cq0,cp1,cq1,cu,cbj0,cby0,cbj1,cby1;
  1096 + complex<P> cyy,cbjk,ct2;
  1097 + P a0,y0;
  1098 + int k,m;
  1099 + static P a[] = {
  1100 + -0.7031250000000000e-1,
  1101 + 0.1121520996093750,
  1102 + -0.5725014209747314,
  1103 + 6.074042001273483};
  1104 + static P b[] = {
  1105 + 0.7324218750000000e-1,
  1106 + -0.2271080017089844,
  1107 + 1.727727502584457,
  1108 + -2.438052969955606e1};
  1109 + static P a1[] = {
  1110 + 0.1171875,
  1111 + -0.1441955566406250,
  1112 + 0.6765925884246826,
  1113 + -6.883914268109947};
  1114 + static P b1[] = {
  1115 + -0.1025390625,
  1116 + 0.2775764465332031,
  1117 + -1.993531733751297,
  1118 + 2.724882731126854e1};
  1119 +
  1120 + y0 = abs(imag(z));
  1121 + a0 = abs(z);
  1122 + nm = n;
  1123 + if (a0 < 1.0e-100) {
  1124 + for (k=0;k<=n;k++) {
  1125 + cj[k] = czero;
  1126 + cy[k] = complex<P> (-1e308,0);
  1127 + cjp[k] = czero;
  1128 + cyp[k] = complex<P>(1e308,0);
  1129 + }
  1130 + cj[0] = cone;
  1131 + cjp[1] = complex<P>(0.5,0.0);
  1132 + return 0;
  1133 + }
  1134 + if ((a0 <= 300.0) || (n > (int)(0.25*a0))) {
  1135 + if (n == 0) nm = 1;
  1136 + m = msta1(a0,200);
  1137 + if (m < nm) nm = m;
  1138 + else m = msta2(a0,nm,15);
  1139 + cbs = czero;
  1140 + csu = czero;
  1141 + csv = czero;
  1142 + cf2 = czero;
  1143 + cf1 = complex<P> (1.0e-100,0.0);
  1144 + for (k=m;k>=0;k--) {
  1145 + cf = 2.0*(k+1.0)*cf1/z-cf2;
  1146 + if (k <= nm) cj[k] = cf;
  1147 + if (((k & 1) == 0) && (k != 0)) {
  1148 + if (y0 <= 1.0) {
  1149 + cbs += 2.0*cf;
  1150 + }
  1151 + else {
  1152 + cbs += (-1)*((k & 2)-1)*2.0*cf;
  1153 + }
  1154 + csu += (P)((-1)*((k & 2)-1))*cf/(P)k;
  1155 + }
  1156 + else if (k > 1) {
  1157 + csv += (P)((-1)*((k & 2)-1)*k)*cf/(P)(k*k-1.0);
  1158 + }
  1159 + cf2 = cf1;
  1160 + cf1 = cf;
  1161 + }
  1162 + if (y0 <= 1.0) cs0 = cbs+cf;
  1163 + else cs0 = (cbs+cf)/cos(z);
  1164 + for (k=0;k<=nm;k++) {
  1165 + cj[k] /= cs0;
  1166 + }
  1167 + ce = log(0.5*z)+el;
  1168 + cy[0] = M_2_PI*(ce*cj[0]-4.0*csu/cs0);
  1169 + cy[1] = M_2_PI*(-cj[0]/z+(ce-1.0)*cj[1]-4.0*csv/cs0);
  1170 + }
  1171 + else {
  1172 + ct1 = z-M_PI_4;
  1173 + cp0 = cone;
  1174 + for (k=0;k<4;k++) {
  1175 + cp0 += a[k]*pow(z,-2.0*k-2.0);
  1176 + }
  1177 + cq0 = -0.125/z;
  1178 + for (k=0;k<4;k++) {
  1179 + cq0 += b[k] *pow(z,-2.0*k-3.0);
  1180 + }
  1181 + cu = sqrt(M_2_PI/z);
  1182 + cbj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1));
  1183 + cby0 = cu*(cp0*sin(ct1)+cq0*cos(ct1));
  1184 + cj[0] = cbj0;
  1185 + cy[0] = cby0;
  1186 + ct2 = z-0.75*M_PI;
  1187 + cp1 = cone;
  1188 + for (k=0;k<4;k++) {
  1189 + cp1 += a1[k]*pow(z,-2.0*k-2.0);
  1190 + }
  1191 + cq1 = 0.375/z;
  1192 + for (k=0;k<4;k++) {
  1193 + cq1 += b1[k]*pow(z,-2.0*k-3.0);
  1194 + }
  1195 + cbj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2));
  1196 + cby1 = cu*(cp1*sin(ct2)+cq1*cos(ct2));
  1197 + cj[1] = cbj1;
  1198 + cy[1] = cby1;
  1199 + for (k=2;k<=n;k++) {
  1200 + cbjk = 2.0*(k-1.0)*cbj1/z-cbj0;
  1201 + cj[k] = cbjk;
  1202 + cbj0 = cbj1;
  1203 + cbj1 = cbjk;
  1204 + }
  1205 + }
  1206 + cjp[0] = -cj[1];
  1207 + for (k=1;k<=nm;k++) {
  1208 + cjp[k] = cj[k-1]-(P)k*cj[k]/z;
  1209 + }
  1210 + if (abs(cj[0]) > 1.0)
  1211 + cy[1] = (cj[1]*cy[0]-2.0/(M_PI*z))/cj[0];
  1212 + for (k=2;k<=nm;k++) {
  1213 + if (abs(cj[k-1]) >= abs(cj[k-2]))
  1214 + cyy = (cj[k]*cy[k-1]-2.0/(M_PI*z))/cj[k-1];
  1215 + else
  1216 + cyy = (cj[k]*cy[k-2]-4.0*(k-1.0)/(M_PI*z*z))/cj[k-2];
  1217 + cy[k] = cyy;
  1218 + }
  1219 + cyp[0] = -cy[1];
  1220 + for (k=1;k<=nm;k++) {
  1221 + cyp[k] = cy[k-1]-(P)k*cy[k]/z;
  1222 + }
  1223 +
  1224 + return 0;
  1225 +}
  1226 +
  1227 +template<typename P>
  1228 +int cbessjyva(P v,complex<P> z,P &vm,complex<P>*cjv,
  1229 + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp)
  1230 +{
  1231 + complex<P> z1,z2,zk,cjvl,cr,ca,cjv0,cjv1,cpz,crp;
  1232 + complex<P> cqz,crq,ca0,cck,csk,cyv0,cyv1,cju0,cju1,cb;
  1233 + complex<P> cs,cs0,cr0,cs1,cr1,cec,cf,cf0,cf1,cf2;
  1234 + complex<P> cfac0,cfac1,cg0,cg1,cyk,cp11,cp12,cp21,cp22;
  1235 + complex<P> ch0,ch1,ch2,cyl1,cyl2,cylk;
  1236 +
  1237 + P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa;
  1238 + int j,n,k,kz,l,lb,lb0,m;
  1239 +
  1240 + a0 = abs(z);
  1241 + z1 = z;
  1242 + z2 = z*z;
  1243 + n = (int)v;
  1244 +
  1245 +
  1246 + v0 = v-n;
  1247 +
  1248 + pv0 = M_PI*v0;
  1249 + pv1 = M_PI*(1.0+v0);
  1250 + if (a0 < 1.0e-100) {
  1251 + for (k=0;k<=n;k++) {
  1252 + cjv[k] = czero;
  1253 + cyv[k] = complex<P> (-1e308,0);
  1254 + cjvp[k] = czero;
  1255 + cyvp[k] = complex<P> (1e308,0);
  1256 +
  1257 + }
  1258 + if (v0 == 0.0) {
  1259 + cjv[0] = cone;
  1260 + cjvp[1] = complex<P> (0.5,0.0);
  1261 + }
  1262 + else {
  1263 + cjvp[0] = complex<P> (1e308,0);
  1264 + }
  1265 + vm = v;
  1266 + return 0;
  1267 + }
  1268 + if (real(z1) < 0.0) z1 = -z;
  1269 + if (a0 <= 12.0) {
  1270 + for (l=0;l<2;l++) {
  1271 + vl = v0+l;
  1272 + cjvl = cone;
  1273 + cr = cone;
  1274 + for (k=1;k<=40;k++) {
  1275 + cr *= -0.25*z2/(k*(k+vl));
  1276 + cjvl += cr;
  1277 + if (abs(cr) < abs(cjvl)*eps) break;
  1278 + }
  1279 + vg = 1.0 + vl;
  1280 + ga = gamma(vg);
  1281 + ca = pow(0.5*z1,vl)/ga;
  1282 + if (l == 0) cjv0 = cjvl*ca;
  1283 + else cjv1 = cjvl*ca;
  1284 + }
  1285 + }
  1286 + else {
  1287 + if (a0 >= 50.0) kz = 8;
  1288 + else if (a0 >= 35.0) kz = 10;
  1289 + else kz = 11;
  1290 + for (j=0;j<2;j++) {
  1291 + vv = 4.0*(j+v0)*(j+v0);
  1292 + cpz = cone;
  1293 + crp = cone;
  1294 + for (k=1;k<=kz;k++) {
  1295 + crp = -0.78125e-2*crp*(vv-pow(4.0*k-3.0,2.0))*
  1296 + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*z2);
  1297 + cpz += crp;
  1298 + }
  1299 + cqz = cone;
  1300 + crq = cone;
  1301 + for (k=1;k<=kz;k++) {
  1302 + crq = -0.78125e-2*crq*(vv-pow(4.0*k-1.0,2.0))*
  1303 + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*z2);
  1304 + cqz += crq;
  1305 + }
  1306 + cqz *= 0.125*(vv-1.0)/z1;
  1307 + zk = z1-(0.5*(j+v0)+0.25)*M_PI;
  1308 + ca0 = sqrt(M_2_PI/z1);
  1309 + cck = cos(zk);
  1310 + csk = sin(zk);
  1311 + if (j == 0) {
  1312 + cjv0 = ca0*(cpz*cck-cqz*csk);
  1313 + cyv0 = ca0*(cpz*csk+cqz+cck);
  1314 + }
  1315 + else {
  1316 + cjv1 = ca0*(cpz*cck-cqz*csk);
  1317 + cyv1 = ca0*(cpz*csk+cqz*cck);
  1318 + }
  1319 + }
  1320 + }
  1321 + if (a0 <= 12.0) {
  1322 + if (v0 != 0.0) {
  1323 + for (l=0;l<2;l++) {
  1324 + vl = v0+l;
  1325 + cjvl = cone;
  1326 + cr = cone;
  1327 + for (k=1;k<=40;k++) {
  1328 + cr *= -0.25*z2/(k*(k-vl));
  1329 + cjvl += cr;
  1330 + if (abs(cr) < abs(cjvl)*eps) break;
  1331 + }
  1332 + vg = 1.0-vl;
  1333 + gb = gamma(vg);
  1334 + cb = pow(2.0/z1,vl)/gb;
  1335 + if (l == 0) cju0 = cjvl*cb;
  1336 + else cju1 = cjvl*cb;
  1337 + }
  1338 + cyv0 = (cjv0*cos(pv0)-cju0)/sin(pv0);
  1339 + cyv1 = (cjv1*cos(pv1)-cju1)/sin(pv1);
  1340 + }
  1341 + else {
  1342 + cec = log(0.5*z1)+el;
  1343 + cs0 = czero;
  1344 + w0 = 0.0;
  1345 + cr0 = cone;
  1346 + for (k=1;k<=30;k++) {
  1347 + w0 += 1.0/k;
  1348 + cr0 *= -0.25*z2/(P)(k*k);
  1349 + cs0 += cr0*w0;
  1350 + }
  1351 + cyv0 = M_2_PI*(cec*cjv0-cs0);
  1352 + cs1 = cone;
  1353 + w1 = 0.0;
  1354 + cr1 = cone;
  1355 + for (k=1;k<=30;k++) {
  1356 + w1 += 1.0/k;
  1357 + cr1 *= -0.25*z2/(k*(k+1.0));
  1358 + cs1 += cr1*(2.0*w1+1.0/(k+1.0));
  1359 + }
  1360 + cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1);
  1361 + }
  1362 + }
  1363 + if (real(z) < 0.0) {
  1364 + cfac0 = exp(pv0*cii);
  1365 + cfac1 = exp(pv1*cii);
  1366 + if (imag(z) < 0.0) {
  1367 + cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
  1368 + cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
  1369 + cjv0 /= cfac0;
  1370 + cjv1 /= cfac1;
  1371 + }
  1372 + else if (imag(z) > 0.0) {
  1373 + cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0;
  1374 + cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1;
  1375 + cjv0 *= cfac0;
  1376 + cjv1 *= cfac1;
  1377 + }
  1378 + }
  1379 + cjv[0] = cjv0;
  1380 + cjv[1] = cjv1;
  1381 + if ((n >= 2) && (n <= (int)(0.25*a0))) {
  1382 + cf0 = cjv0;
  1383 + cf1 = cjv1;
  1384 + for (k=2;k<= n;k++) {
  1385 + cf = 2.0*(k+v0-1.0)*cf1/z-cf0;
  1386 + cjv[k] = cf;
  1387 + cf0 = cf1;
  1388 + cf1 = cf;
  1389 + }
  1390 + }
  1391 + else if (n >= 2) {
  1392 + m = msta1(a0,200);
  1393 + if (m < n) n = m;
  1394 + else m = msta2(a0,n,15);
  1395 + cf2 = czero;
  1396 + cf1 = complex<P>(1.0e-100,0.0);
  1397 + for (k=m;k>=0;k--) {
  1398 + cf = 2.0*(v0+k+1.0)*cf1/z-cf2;
  1399 + if (k <= n) cjv[k] = cf;
  1400 + cf2 = cf1;
  1401 + cf1 = cf;
  1402 + }
  1403 + if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf;
  1404 + else cs = cjv1/cf2;
  1405 + for (k=0;k<=n;k++) {
  1406 + cjv[k] *= cs;
  1407 + }
  1408 + }
  1409 + cjvp[0] = v0*cjv[0]/z-cjv[1];
  1410 + for (k=1;k<=n;k++) {
  1411 + cjvp[k] = -(k+v0)*cjv[k]/z+cjv[k-1];
  1412 + }
  1413 + cyv[0] = cyv0;
  1414 + cyv[1] = cyv1;
  1415 + ya0 = abs(cyv0);
  1416 + lb = 0;
  1417 + cg0 = cyv0;
  1418 + cg1 = cyv1;
  1419 + for (k=2;k<=n;k++) {
  1420 + cyk = 2.0*(v0+k-1.0)*cg1/z-cg0;
  1421 + yak = abs(cyk);
  1422 + ya1 = abs(cg0);
  1423 + if ((yak < ya0) && (yak< ya1)) lb = k;
  1424 + cyv[k] = cyk;
  1425 + cg0 = cg1;
  1426 + cg1 = cyk;
  1427 + }
  1428 + lb0 = 0;
  1429 + if ((lb > 4) && (imag(z) != 0.0)) {
  1430 + while(lb != lb0) {
  1431 + ch2 = cone;
  1432 + ch1 = czero;
  1433 + lb0 = lb;
  1434 + for (k=lb;k>=1;k--) {
  1435 + ch0 = 2.0*(k+v0)*ch1/z-ch2;
  1436 + ch2 = ch1;
  1437 + ch1 = ch0;
  1438 + }
  1439 + cp12 = ch0;
  1440 + cp22 = ch2;
  1441 + ch2 = czero;
  1442 + ch1 = cone;
  1443 + for (k=lb;k>=1;k--) {
  1444 + ch0 = 2.0*(k+v0)*ch1/z-ch2;
  1445 + ch2 = ch1;
  1446 + ch1 = ch0;
  1447 + }
  1448 + cp11 = ch0;
  1449 + cp21 = ch2;
  1450 + if (lb == n)
  1451 + cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1];
  1452 + if (abs(cjv[0]) > abs(cjv[1])) {
  1453 + cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0];
  1454 + cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0];
  1455 + }
  1456 + else {
  1457 + cyv[lb+1] = (cjv[lb+1]*cyv1-2.0*cp21/(M_PI*z))/cjv[1];
  1458 + cyv[lb] = (cjv[lb]*cyv1+2.0*cp22/(M_PI*z))/cjv[1];
  1459 + }
  1460 + cyl2 = cyv[lb+1];
  1461 + cyl1 = cyv[lb];
  1462 + for (k=lb-1;k>=0;k--) {
  1463 + cylk = 2.0*(k+v0+1.0)*cyl1/z-cyl2;
  1464 + cyv[k] = cylk;
  1465 + cyl2 = cyl1;
  1466 + cyl1 = cylk;
  1467 + }
  1468 + cyl1 = cyv[lb];
  1469 + cyl2 = cyv[lb+1];
  1470 + for (k=lb+1;k<n;k++) {
  1471 + cylk = 2.0*(k+v0)*cyl2/z-cyl1;
  1472 + cyv[k+1] = cylk;
  1473 + cyl1 = cyl2;
  1474 + cyl2 = cylk;
  1475 + }
  1476 + for (k=2;k<=n;k++) {
  1477 + wa = abs(cyv[k]);
  1478 + if (wa < abs(cyv[k-1])) lb = k;
  1479 + }
  1480 + }
  1481 + }
  1482 + cyvp[0] = v0*cyv[0]/z-cyv[1];
  1483 + for (k=1;k<=n;k++) {
  1484 + cyvp[k] = cyv[k-1]-(k+v0)*cyv[k]/z;
  1485 + }
  1486 + vm = n+v0;
  1487 + return 0;
  1488 +}
  1489 +
  1490 +template<typename P>
  1491 +int cbessjyva_sph(int v,complex<P> z,P &vm,complex<P>*cjv,
  1492 + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp)
  1493 +{
  1494 + //first, compute the bessel functions of fractional order
  1495 + cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp);
  1496 +
  1497 + //iterate through each and scale
  1498 + for(int n = 0; n<=v; n++)
  1499 + {
  1500 +
  1501 + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0));
  1502 + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0));
  1503 +
  1504 + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0));
  1505 + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0));
  1506 + }
  1507 +
  1508 + return 0;
  1509 +
  1510 +}
  1511 +
  1512 +} //end namespace rts
  1513 +
  1514 +
  1515 +#endif
... ...
stim/math/filters/gauss3.h
... ... @@ -13,7 +13,7 @@ namespace stim
13 13 ///@param dimx is the size of in* in the z direction.
14 14 ///@param stdx is the standard deviation (in pixels) along the x axis.
15 15 ///@param stdy is the standard deviation (in pixels) along the y axis.
16   - ///@param nstds specifies the number of standard deviations of the Gaussian that will be k ept in the kernel.
  16 + ///@param nstds specifies the number of standard deviations of the Gaussian that will be kept in the kernel.
17 17 template<typename T, typename K>
18 18 void cpu_gauss3(T* in, K dimx, K dimy, K dimz, K stdx, K stdy, K stdz, size_t nstds = 3)
19 19 {
... ...
stim/math/matrix.h
... ... @@ -37,6 +37,20 @@ struct matrix
37 37 return *this;
38 38 }
39 39  
  40 + //create a symmetric matrix given the rhs values, given in column-major order
  41 + CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){
  42 + const size_t L = (N*N+N)/2; //store the number of values
  43 +
  44 + size_t r, c;
  45 + r = c = 0;
  46 + for(size_t i = 0; i < L; i++){ //for each value
  47 + if(r == c) M[c * N + r] = rhs[i];
  48 + else M[c*N + r] = M[r * N + c] = rhs[i];
  49 + r++;
  50 + if(r == N) r = ++c;
  51 + }
  52 + }
  53 +
40 54 CUDA_CALLABLE T& operator()(int row, int col)
41 55 {
42 56 return M[col * N + row];
... ... @@ -91,6 +105,14 @@ struct matrix
91 105  
92 106 return ss.str();
93 107 }
  108 +
  109 + static matrix<T, N> identity() {
  110 + matrix<T, N> I;
  111 + I = 0;
  112 + for (size_t i = 0; i < N; i++)
  113 + I.M[i * N + i] = 1;
  114 + return I;
  115 + }
94 116 };
95 117  
96 118 } //end namespace rts
... ...
stim/math/matrix_sym.h 0 → 100644
  1 +#ifndef STIM_MATRIX_SYM_H
  2 +#define STIM_MATRIX_SYM_H
  3 +
  4 +#include <stim/cuda/cudatools/callable.h>
  5 +#include <stim/math/matrix.h>
  6 +
  7 +/* This class represents a rank 2, 3-dimensional tensor viable
  8 +for representing tensor fields such as structure and diffusion tensors
  9 +*/
  10 +namespace stim{
  11 +
  12 +template <typename T, int D>
  13 +class matrix_sym{
  14 +
  15 +protected:
  16 + //values are stored in column-major order as a lower-triangular matrix
  17 + T M[D*(D + 1)/2];
  18 +
  19 + static size_t idx(size_t r, size_t c) {
  20 + //if the index is in the upper-triangular portion, swap the indices
  21 + if(r < c){
  22 + size_t t = r;
  23 + r = c;
  24 + c = t;
  25 + }
  26 +
  27 + size_t ci = (c + 1) * (D + (D - c))/2 - 1; //index to the end of column c
  28 + size_t i = ci - (D - r - 1);
  29 + return i;
  30 + }
  31 +
  32 + //calculate the row and column given an index
  33 + //static void indices(size_t& r, size_t& c, size_t idx) {
  34 + // size_t col = 0;
  35 + // for ( ; col < D; col++)
  36 + // if(idx <= ((D - col + D) * (col + 1)/2 - 1))
  37 + // break;
  38 +
  39 + // c = col;
  40 + // size_t ci = (D - (col - 1) + D) * col / 2 - 1; //index to the end of last column col -1
  41 + // r = idx - ci + c - 1;
  42 + //}
  43 + static void indices(size_t& r, size_t& c, size_t idx) {
  44 + size_t cf = -1/2 * sqrt(4 * D * D + 4 * D - (7 + 8 * idx)) + D - 1/2;
  45 + c = ceil(cf);
  46 + r = idx - D * c + c * (c + 1) / 2;
  47 + }
  48 +
  49 +public:
  50 + //return the symmetric matrix associated with this tensor
  51 + stim::matrix<T, D> mat() {
  52 + stim::matrix<T, D> r;
  53 + r.setsym(M);
  54 + return r;
  55 + }
  56 +
  57 + CUDA_CALLABLE T& operator()(int r, int c) {
  58 + return M[idx(r, c)];
  59 + }
  60 +
  61 + CUDA_CALLABLE matrix_sym<T, D> operator=(T rhs) {
  62 + int Nsq = D*(D+1)/2;
  63 + for(int i=0; i<Nsq; i++)
  64 + M[i] = rhs;
  65 +
  66 + return *this;
  67 + }
  68 +
  69 + CUDA_CALLABLE matrix_sym<T, D> operator=(matrix_sym<T, D> rhs) {
  70 + size_t N = D * (D + 1) / 2;
  71 + for (size_t i = 0; i < N; i++) M[i] = rhs.M[i];
  72 + return *this;
  73 + }
  74 +
  75 + CUDA_CALLABLE T trace() {
  76 + T tr = 0;
  77 + for (size_t i = 0; i < D; i++) //for each diagonal value
  78 + tr += M[idx(i, i)]; //add the value on the diagonal
  79 + return tr;
  80 + }
  81 + // overload matrix multiply scalar
  82 + CUDA_CALLABLE void operator_product(matrix_sym<T, D> &B, T rhs) {
  83 + int Nsq = D*(D+1)/2;
  84 + for(int i=0; i<Nsq; i++)
  85 + B.M[i] *= rhs;
  86 + }
  87 +
  88 + //return the tensor as a string
  89 + std::string str() {
  90 + std::stringstream ss;
  91 + for(int r = 0; r < D; r++){
  92 + ss << "| ";
  93 + for(int c=0; c<D; c++)
  94 + {
  95 + ss << (*this)(r, c) << " ";
  96 + }
  97 + ss << "|" << std::endl;
  98 + }
  99 +
  100 + return ss.str();
  101 + }
  102 +
  103 + //returns an identity matrix
  104 + static matrix_sym<T, D> identity() {
  105 + matrix_sym<T, D> I;
  106 + I = 0;
  107 + for (size_t i = 0; i < D; i++)
  108 + I.M[matrix_sym<T, D>::idx(i, i)] = 1;
  109 + return I;
  110 + }
  111 +};
  112 +
  113 +
  114 +
  115 +} //end namespace stim
  116 +
  117 +
  118 +#endif
... ...
stim/math/tensor2.h 0 → 100644
  1 +#ifndef STIM_TENSOR2_H
  2 +#define STIM_TENSOR2_H
  3 +
  4 +#include "matrix_sym.h"
  5 +
  6 +namespace stim {
  7 +
  8 +/*This class represents a symmetric rank-2 2D tensor, useful for structure tensors
  9 +*/
  10 +template<typename T>
  11 +class tensor2 : public matrix_sym<T, 2> {
  12 +
  13 +protected:
  14 +
  15 +public:
  16 +
  17 + //calculate the eigenvectors and eigenvalues of the tensor
  18 + CUDA_CALLABLE void eig(stim::matrix<T, 2>& v, stim::matrix<T, 2>& lambda) {
  19 +
  20 + lambda = 0; //initialize the eigenvalue matrix to zero
  21 +
  22 + T t = M[0] + M[2]; //calculate the trace of the tensor
  23 + T d = M[0] * M[2] - M[1] * M[1]; //calculate the determinant of the tensor
  24 +
  25 + lambda(0, 0) = t / 2 + sqrt(t*t / 4 - d);
  26 + lambda(1, 1) = t / 2 - sqrt(t*t / 4 - d);
  27 +
  28 + if (M[1] == 0) {
  29 + v = stim::matrix<T, 2>::identity();
  30 + }
  31 + else {
  32 + v(0, 0) = lambda(0, 0) - d;
  33 + v(0, 1) = lambda(1, 1) - d;
  34 + v(1, 0) = v(1, 1) = M[1];
  35 + }
  36 + }
  37 +
  38 + CUDA_CALLABLE tensor2<T> operator=(stim::matrix_sym<T, 2> rhs){
  39 + stim::matrix_sym<T, 2>::operator=(rhs);
  40 + return *this;
  41 + }
  42 +};
  43 +
  44 +
  45 +} //end namespace stim
  46 +
  47 +
  48 +#endif
0 49 \ No newline at end of file
... ...
stim/math/tensor3.h 0 → 100644
  1 +#ifndef STIM_TENSOR3_H
  2 +#define STIM_TENSOR3_H
  3 +
  4 +#include "matrix_sym.h"
  5 +#include <stim/math/constants.h>
  6 +
  7 +namespace stim {
  8 +
  9 + /*This class represents a symmetric rank-2 2D tensor, useful for structure tensors
  10 + */
  11 +
  12 + //Matrix ID cheat sheet
  13 + // | 0 1 2 |
  14 + // | 1 3 4 |
  15 + // | 2 4 5 |
  16 + template<typename T>
  17 + class tensor3 : public matrix_sym<T, 3> {
  18 +
  19 + protected:
  20 +
  21 + public:
  22 +
  23 + //calculates the determinant of the tensor
  24 + CUDA_CALLABLE T det() {
  25 + return M[0] * M[3] * M[5] + 2 * (M[1] * M[4] * M[2]) - M[2] * M[3] * M[2] - M[1] * M[1] * M[5] - M[0] * M[4] * M[4];
  26 + }
  27 +
  28 + //calculate the eigenvalues for the tensor
  29 + //adapted from https://en.wikipedia.org/wiki/Eigenvalue_algorithm
  30 +
  31 + CUDA_CALLABLE stim::vec3<T> lambda() {
  32 + stim::vec3<T> lam;
  33 + T p1 = M[1] * M[1] + M[2] * M[2] + M[4] * M[4]; //calculate the sum of the squared off-diagonal values
  34 + if (p1 == 0) { //if this value is zero, the matrix is diagonal
  35 + lam[0] = M[0]; //the eigenvalues are the diagonal values
  36 + lam[1] = M[3];
  37 + lam[2] = M[5];
  38 + return lam; //return the eigenvalue vector
  39 + }
  40 +
  41 + T tr = matrix_sym<T, 3>::trace(); //calculate the trace of the matrix
  42 + T q = tr / 3;
  43 + T p2 = (M[0] - q) * (M[0] - q) + (M[3] - q) * (M[3] - q) + (M[5] - q) * (M[5] - q) + 2 * p1;
  44 + T p = sqrt(p2 / 6);
  45 + tensor3<T> Q; //allocate space for Q (q along the diagonals)
  46 + Q = (T)0; //initialize Q to zeros
  47 + Q(0, 0) = Q(1, 1) = Q(2, 2) = q; //set the diagonal values to q
  48 + tensor3<T> B = *this; // B1 = A
  49 + B.M[0] = (B.M[0] - q);
  50 + B.M[3] = (B.M[3] - q);
  51 + B.M[5] = (B.M[5] - q);
  52 + matrix_sym<T, 3>::operator_product(B, 1/p); // B = (1/p) * (A - q*I)
  53 + //B.M[0] = B.M[0] * 1/p;
  54 + //B.M[1] = B.M[1] * 1/p;
  55 + //B.M[2] = B.M[2] * 1/p;
  56 + //B.M[3] = B.M[3] * 1/p;
  57 + //B.M[4] = B.M[4] * 1/p;
  58 + //B.M[5] = B.M[5] * 1/p;
  59 + T r = B.det() / 2; //calculate det(B) / 2
  60 +
  61 + // In exact arithmetic for a symmetric matrix - 1 <= r <= 1
  62 + // but computation error can leave it slightly outside this range.
  63 + T phi;
  64 + if (r <= -1) phi = stim::PI / 3;
  65 + else if (r >= 1) phi = 0;
  66 + else phi = acos(r) / 3;
  67 +
  68 + // the eigenvalues satisfy eig3 >= eig2 >= eig1
  69 + lam[2] = q + 2 * p * cos(phi);
  70 + lam[0] = q + 2 * p * cos(phi + (2 * stim::PI / 3));
  71 + lam[1] = 3 * q - (lam[2] + lam[0]);
  72 +
  73 + return lam;
  74 + }
  75 +
  76 + CUDA_CALLABLE stim::matrix<T, 3> eig(stim::vec3<T>& lambda = stim::vec3<T>()) {
  77 + stim::matrix<T, 3> V;
  78 +
  79 + stim::matrix<T, 3> M1 = matrix_sym<T, 3>::mat();
  80 + stim::matrix<T, 3> M2 = matrix_sym<T, 3>::mat();
  81 + stim::matrix<T, 3> M3 = matrix_sym<T, 3>::mat(); // fill a tensor with symmetric values
  82 +
  83 + M1.operator_minus(M1, lambda[0]); // M1 = A - lambda[0] * I
  84 +
  85 + M2.operator_minus(M2, lambda[1]); // M2 = A - lambda[1] * I
  86 +
  87 + M3.operator_minus(M3, lambda[2]); // M3 = A - lambda[2] * I
  88 +
  89 + T Mod = 0; // module of one column
  90 +
  91 + T tmp1[9] = {0};
  92 + for(int i = 0; i < 9; i++) {
  93 + for(int j = 0; j < 3; j++){
  94 + tmp1[i] += M2(i%3, j) * M3(j, i/3);
  95 + }
  96 + }
  97 + if(tmp1[0] * tmp1[1] * tmp1[2] != 0) { // test whether it is zero column
  98 + Mod = sqrt(pow(tmp1[0],2) + pow(tmp1[1],2) + pow(tmp1[2],2));
  99 + V(0, 0) = tmp1[0]/Mod;
  100 + V(1, 0) = tmp1[1]/Mod;
  101 + V(2, 0) = tmp1[2]/Mod;
  102 + }
  103 + else {
  104 + Mod = sqrt(pow(tmp1[3],2) + pow(tmp1[4],2) + pow(tmp1[5],2));
  105 + V(0, 0) = tmp1[3]/Mod;
  106 + V(1, 0) = tmp1[4]/Mod;
  107 + V(2, 0) = tmp1[5]/Mod;
  108 + }
  109 +
  110 + T tmp2[9] = {0};
  111 + for(int i = 0; i < 9; i++) {
  112 + for(int j = 0; j < 3; j++){
  113 + tmp2[i] += M1(i%3, j) * M3(j, i/3);
  114 + }
  115 + }
  116 + if(tmp2[0] * tmp2[1] * tmp2[2] != 0) {
  117 + Mod = sqrt(pow(tmp2[0],2) + pow(tmp2[1],2) + pow(tmp2[2],2));
  118 + V(0, 1) = tmp2[0]/Mod;
  119 + V(1, 1) = tmp2[1]/Mod;
  120 + V(2, 1) = tmp2[2]/Mod;
  121 + }
  122 + else {
  123 + Mod = sqrt(pow(tmp2[3],2) + pow(tmp2[4],2) + pow(tmp2[5],2));
  124 + V(0, 1) = tmp2[3]/Mod;
  125 + V(1, 1) = tmp2[4]/Mod;
  126 + V(2, 1) = tmp2[5]/Mod;
  127 + }
  128 +
  129 + T tmp3[9] = {0};
  130 + for(int i = 0; i < 9; i++) {
  131 + for(int j = 0; j < 3; j++){
  132 + tmp3[i] += M1(i%3, j) * M2(j, i/3);
  133 + }
  134 + }
  135 + if(tmp3[0] * tmp3[1] * tmp3[2] != 0) {
  136 + Mod = sqrt(pow(tmp3[0],2) + pow(tmp3[1],2) + pow(tmp3[2],2));
  137 + V(0, 2) = tmp3[0]/Mod;
  138 + V(1, 2) = tmp3[1]/Mod;
  139 + V(2, 2) = tmp3[2]/Mod;
  140 + }
  141 + else {
  142 + Mod = sqrt(pow(tmp3[3],2) + pow(tmp3[4],2) + pow(tmp3[5],2));
  143 + V(0, 2) = tmp3[3]/Mod;
  144 + V(1, 2) = tmp3[4]/Mod;
  145 + V(2, 2) = tmp3[5]/Mod;
  146 + }
  147 + return V; //return the eigenvector matrix
  148 + }
  149 + // return one specific eigenvector
  150 + CUDA_CALLABLE stim::vec3<T> eig(int n, stim::vec3<T>& lambda = stim::vec3<T>()) {
  151 + stim::matrix<T, 3> V = eig(lambda);
  152 + stim::vec3<T> v;
  153 + for(int i = 0; i < 3; i++)
  154 + v[i] = V(i, n);
  155 + return v;
  156 + }
  157 +
  158 +
  159 + CUDA_CALLABLE T linear(stim::vec3<T>& lambda = stim::vec3<T>()) {
  160 + T cl = (lambda[2] - lambda[1]) / (lambda[0] + lambda[1] + lambda[2]);
  161 + return cl;
  162 + }
  163 +
  164 + CUDA_CALLABLE T Planar(stim::vec3<T>& lambda = stim::vec3<T>()) {
  165 + T cp = 2 * (lambda[1] - lambda[0]) / (lambda[0] + lambda[1] + lambda[2]);
  166 + return cp;
  167 + }
  168 +
  169 + CUDA_CALLABLE T spherical(stim::vec3<T>& lambda = stim::vec3<T>()) {
  170 + T cs = 3 * lambda[0] / (lambda[0] + lambda[1] + lambda[2]);
  171 + return cs;
  172 + }
  173 +
  174 + CUDA_CALLABLE T fa(stim::vec3<T>& lambda = stim::vec3<T>()) {
  175 + T fa = sqrt(1/2) * sqrt(pow(lambda[2] - lambda[1], 2) + pow(lambda[1] - lambda[0], 2) + pow(lambda[0] - lambda[2], 2)) / sqrt(pow(lambda[2], 2) + pow(lambda[1], 2) + pow(lambda[0], 2));
  176 + }
  177 + //JACK 2: write functions to calculate anisotropy
  178 + //ex: fa(), linear(), planar(), spherical()
  179 +
  180 +
  181 + //calculate the eigenvectors and eigenvalues of the tensor
  182 + //CUDA_CALLABLE void eig(stim::matrix<T, 3>& v, stim::matrix<T, 3>& lambda){
  183 +
  184 + //}
  185 + CUDA_CALLABLE tensor3<T> operator=(T rhs) {
  186 + stim::matrix_sym<T, 3>::operator=(rhs);
  187 + return *this;
  188 + }
  189 +
  190 + CUDA_CALLABLE tensor3<T> operator=(stim::matrix_sym<T, 3> rhs) {
  191 + stim::matrix_sym<T, 3>::operator=(rhs);
  192 + return *this;
  193 + }
  194 + };
  195 +
  196 +
  197 +} //end namespace stim
  198 +
  199 +
  200 +#endif
0 201 \ No newline at end of file
... ...
stim/math/vec3.h.orig 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +#include <cmath>
  7 +
  8 +
  9 +namespace stim{
  10 +
  11 +
  12 +/// A class designed to act as a 3D vector with CUDA compatibility
  13 +template<typename T>
  14 +class vec3{
  15 +
  16 +protected:
  17 + T ptr[3];
  18 +
  19 +public:
  20 +
  21 + CUDA_CALLABLE vec3(){}
  22 +
  23 + CUDA_CALLABLE vec3(T v){
  24 + ptr[0] = ptr[1] = ptr[2] = v;
  25 + }
  26 +
  27 + CUDA_CALLABLE vec3(T x, T y, T z){
  28 + ptr[0] = x;
  29 + ptr[1] = y;
  30 + ptr[2] = z;
  31 + }
  32 +
  33 + //copy constructor
  34 + CUDA_CALLABLE vec3( const vec3<T>& other){
  35 + ptr[0] = other.ptr[0];
  36 + ptr[1] = other.ptr[1];
  37 + ptr[2] = other.ptr[2];
  38 + }
  39 +
  40 + //access an element using an index
  41 + CUDA_CALLABLE T& operator[](size_t idx){
  42 + return ptr[idx];
  43 + }
  44 +
  45 + CUDA_CALLABLE T* data(){
  46 + return ptr;
  47 + }
  48 +
  49 +/// Casting operator. Creates a new vector with a new type U.
  50 + template< typename U >
  51 + CUDA_CALLABLE operator vec3<U>(){
  52 + vec3<U> result;
  53 + result.ptr[0] = (U)ptr[0];
  54 + result.ptr[1] = (U)ptr[1];
  55 + result.ptr[2] = (U)ptr[2];
  56 +
  57 + return result;
  58 + }
  59 +
  60 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  61 + CUDA_CALLABLE T len_sq() const{
  62 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  63 + }
  64 +
  65 + /// computes the Euclidean length of the vector
  66 + CUDA_CALLABLE T len() const{
  67 + return sqrt(len_sq());
  68 + }
  69 +
  70 +
  71 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  72 + CUDA_CALLABLE vec3<T> cart2sph() const{
  73 + vec3<T> sph;
  74 + sph.ptr[0] = len();
  75 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  76 + if(sph.ptr[0] == 0)
  77 + sph.ptr[2] = 0;
  78 + else
  79 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  80 + return sph;
  81 + }
  82 +
  83 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  84 + CUDA_CALLABLE vec3<T> sph2cart() const{
  85 + vec3<T> cart;
  86 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  88 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  89 +
  90 + return cart;
  91 + }
  92 +
  93 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  94 + CUDA_CALLABLE vec3<T> norm() const{
  95 + vec3<T> result;
  96 + T l = len(); //compute the vector length
  97 + return (*this) / l;
  98 + }
  99 +
  100 + /// Computes the cross product of a 3-dimensional vector
  101 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  102 +
  103 + vec3<T> result;
  104 +
  105 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  106 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  107 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  108 +
  109 + return result;
  110 + }
  111 +
  112 + /// Compute the Euclidean inner (dot) product
  113 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  114 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  115 + }
  116 +
  117 + /// Arithmetic addition operator
  118 +
  119 + /// @param rhs is the right-hand-side operator for the addition
  120 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  121 + vec3<T> result;
  122 + result.ptr[0] = ptr[0] + rhs[0];
  123 + result.ptr[1] = ptr[1] + rhs[1];
  124 + result.ptr[2] = ptr[2] + rhs[2];
  125 + return result;
  126 + }
  127 +
  128 + /// Arithmetic addition to a scalar
  129 +
  130 + /// @param rhs is the right-hand-side operator for the addition
  131 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  132 + vec3<T> result;
  133 + result.ptr[0] = ptr[0] + rhs;
  134 + result.ptr[1] = ptr[1] + rhs;
  135 + result.ptr[2] = ptr[2] + rhs;
  136 + return result;
  137 + }
  138 +
  139 + /// Arithmetic subtraction operator
  140 +
  141 + /// @param rhs is the right-hand-side operator for the subtraction
  142 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  143 + vec3<T> result;
  144 + result.ptr[0] = ptr[0] - rhs[0];
  145 + result.ptr[1] = ptr[1] - rhs[1];
  146 + result.ptr[2] = ptr[2] - rhs[2];
  147 + return result;
  148 + }
  149 + /// Arithmetic subtraction to a scalar
  150 +
  151 + /// @param rhs is the right-hand-side operator for the addition
  152 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  153 + vec3<T> result;
  154 + result.ptr[0] = ptr[0] - rhs;
  155 + result.ptr[1] = ptr[1] - rhs;
  156 + result.ptr[2] = ptr[2] - rhs;
  157 + return result;
  158 + }
  159 +
  160 + /// Arithmetic scalar multiplication operator
  161 +
  162 + /// @param rhs is the right-hand-side operator for the subtraction
  163 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  164 + vec3<T> result;
  165 + result.ptr[0] = ptr[0] * rhs;
  166 + result.ptr[1] = ptr[1] * rhs;
  167 + result.ptr[2] = ptr[2] * rhs;
  168 + return result;
  169 + }
  170 +
  171 + /// Arithmetic scalar division operator
  172 +
  173 + /// @param rhs is the right-hand-side operator for the subtraction
  174 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  175 + return (*this) * ((T)1.0/rhs);
  176 + }
  177 +
  178 + /// Multiplication by a scalar, followed by assignment
  179 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  180 + ptr[0] = ptr[0] * rhs;
  181 + ptr[1] = ptr[1] * rhs;
  182 + ptr[2] = ptr[2] * rhs;
  183 + return *this;
  184 + }
  185 +
  186 + /// Addition and assignment
  187 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  188 + ptr[0] = ptr[0] + rhs;
  189 + ptr[1] = ptr[1] + rhs;
  190 + ptr[2] = ptr[2] + rhs;
  191 + return *this;
  192 + }
  193 +
  194 + /// Assign a scalar to all values
  195 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  196 + ptr[0] = ptr[0] = rhs;
  197 + ptr[1] = ptr[1] = rhs;
  198 + ptr[2] = ptr[2] = rhs;
  199 + return *this;
  200 + }
  201 +
  202 + /// Casting and assignment
  203 + template<typename Y>
  204 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  205 + ptr[0] = (T)rhs.ptr[0];
  206 + ptr[1] = (T)rhs.ptr[1];
  207 + ptr[2] = (T)rhs.ptr[2];
  208 + return *this;
  209 + }
  210 +
  211 + /// Unary minus (returns the negative of the vector)
  212 + CUDA_CALLABLE vec3<T> operator-() const{
  213 + vec3<T> result;
  214 + result.ptr[0] = -ptr[0];
  215 + result.ptr[1] = -ptr[1];
  216 + result.ptr[2] = -ptr[2];
  217 + return result;
  218 + }
  219 +
  220 +<<<<<<< HEAD
  221 +//#ifndef __NVCC__
  222 +=======
  223 +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c
  224 + /// Outputs the vector as a string
  225 + std::string str() const{
  226 + std::stringstream ss;
  227 +
  228 + const size_t N = 3;
  229 +
  230 + ss<<"[";
  231 + for(size_t i=0; i<N; i++)
  232 + {
  233 + ss<<ptr[i];
  234 + if(i != N-1)
  235 + ss<<", ";
  236 + }
  237 + ss<<"]";
  238 +
  239 + return ss.str();
  240 + }
  241 +<<<<<<< HEAD
  242 +//#endif
  243 +=======
  244 +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c
  245 +
  246 + size_t size(){ return 3; }
  247 +
  248 + }; //end class vec3
  249 +} //end namespace stim
  250 +
  251 +/// Multiply a vector by a constant when the vector is on the right hand side
  252 +template <typename T>
  253 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  254 + return rhs * lhs;
  255 +}
  256 +
  257 +//stream operator
  258 +template<typename T>
  259 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  260 + os<<rhs.str();
  261 + return os;
  262 +}
  263 +
  264 +#endif
... ...
stim/math/vec3_BACKUP_62876.h 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +#include <cmath>
  7 +
  8 +
  9 +namespace stim{
  10 +
  11 +
  12 +/// A class designed to act as a 3D vector with CUDA compatibility
  13 +template<typename T>
  14 +class vec3{
  15 +
  16 +protected:
  17 + T ptr[3];
  18 +
  19 +public:
  20 +
  21 + CUDA_CALLABLE vec3(){}
  22 +
  23 + CUDA_CALLABLE vec3(T v){
  24 + ptr[0] = ptr[1] = ptr[2] = v;
  25 + }
  26 +
  27 + CUDA_CALLABLE vec3(T x, T y, T z){
  28 + ptr[0] = x;
  29 + ptr[1] = y;
  30 + ptr[2] = z;
  31 + }
  32 +
  33 + //copy constructor
  34 + CUDA_CALLABLE vec3( const vec3<T>& other){
  35 + ptr[0] = other.ptr[0];
  36 + ptr[1] = other.ptr[1];
  37 + ptr[2] = other.ptr[2];
  38 + }
  39 +
  40 + //access an element using an index
  41 + CUDA_CALLABLE T& operator[](size_t idx){
  42 + return ptr[idx];
  43 + }
  44 +
  45 + CUDA_CALLABLE T* data(){
  46 + return ptr;
  47 + }
  48 +
  49 +/// Casting operator. Creates a new vector with a new type U.
  50 + template< typename U >
  51 + CUDA_CALLABLE operator vec3<U>(){
  52 + vec3<U> result;
  53 + result.ptr[0] = (U)ptr[0];
  54 + result.ptr[1] = (U)ptr[1];
  55 + result.ptr[2] = (U)ptr[2];
  56 +
  57 + return result;
  58 + }
  59 +
  60 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  61 + CUDA_CALLABLE T len_sq() const{
  62 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  63 + }
  64 +
  65 + /// computes the Euclidean length of the vector
  66 + CUDA_CALLABLE T len() const{
  67 + return sqrt(len_sq());
  68 + }
  69 +
  70 +
  71 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  72 + CUDA_CALLABLE vec3<T> cart2sph() const{
  73 + vec3<T> sph;
  74 + sph.ptr[0] = len();
  75 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  76 + if(sph.ptr[0] == 0)
  77 + sph.ptr[2] = 0;
  78 + else
  79 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  80 + return sph;
  81 + }
  82 +
  83 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  84 + CUDA_CALLABLE vec3<T> sph2cart() const{
  85 + vec3<T> cart;
  86 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  88 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  89 +
  90 + return cart;
  91 + }
  92 +
  93 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  94 + CUDA_CALLABLE vec3<T> norm() const{
  95 + vec3<T> result;
  96 + T l = len(); //compute the vector length
  97 + return (*this) / l;
  98 + }
  99 +
  100 + /// Computes the cross product of a 3-dimensional vector
  101 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  102 +
  103 + vec3<T> result;
  104 +
  105 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  106 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  107 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  108 +
  109 + return result;
  110 + }
  111 +
  112 + /// Compute the Euclidean inner (dot) product
  113 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  114 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  115 + }
  116 +
  117 + /// Arithmetic addition operator
  118 +
  119 + /// @param rhs is the right-hand-side operator for the addition
  120 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  121 + vec3<T> result;
  122 + result.ptr[0] = ptr[0] + rhs[0];
  123 + result.ptr[1] = ptr[1] + rhs[1];
  124 + result.ptr[2] = ptr[2] + rhs[2];
  125 + return result;
  126 + }
  127 +
  128 + /// Arithmetic addition to a scalar
  129 +
  130 + /// @param rhs is the right-hand-side operator for the addition
  131 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  132 + vec3<T> result;
  133 + result.ptr[0] = ptr[0] + rhs;
  134 + result.ptr[1] = ptr[1] + rhs;
  135 + result.ptr[2] = ptr[2] + rhs;
  136 + return result;
  137 + }
  138 +
  139 + /// Arithmetic subtraction operator
  140 +
  141 + /// @param rhs is the right-hand-side operator for the subtraction
  142 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  143 + vec3<T> result;
  144 + result.ptr[0] = ptr[0] - rhs[0];
  145 + result.ptr[1] = ptr[1] - rhs[1];
  146 + result.ptr[2] = ptr[2] - rhs[2];
  147 + return result;
  148 + }
  149 + /// Arithmetic subtraction to a scalar
  150 +
  151 + /// @param rhs is the right-hand-side operator for the addition
  152 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  153 + vec3<T> result;
  154 + result.ptr[0] = ptr[0] - rhs;
  155 + result.ptr[1] = ptr[1] - rhs;
  156 + result.ptr[2] = ptr[2] - rhs;
  157 + return result;
  158 + }
  159 +
  160 + /// Arithmetic scalar multiplication operator
  161 +
  162 + /// @param rhs is the right-hand-side operator for the subtraction
  163 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  164 + vec3<T> result;
  165 + result.ptr[0] = ptr[0] * rhs;
  166 + result.ptr[1] = ptr[1] * rhs;
  167 + result.ptr[2] = ptr[2] * rhs;
  168 + return result;
  169 + }
  170 +
  171 + /// Arithmetic scalar division operator
  172 +
  173 + /// @param rhs is the right-hand-side operator for the subtraction
  174 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  175 + return (*this) * ((T)1.0/rhs);
  176 + }
  177 +
  178 + /// Multiplication by a scalar, followed by assignment
  179 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  180 + ptr[0] = ptr[0] * rhs;
  181 + ptr[1] = ptr[1] * rhs;
  182 + ptr[2] = ptr[2] * rhs;
  183 + return *this;
  184 + }
  185 +
  186 + /// Addition and assignment
  187 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  188 + ptr[0] = ptr[0] + rhs;
  189 + ptr[1] = ptr[1] + rhs;
  190 + ptr[2] = ptr[2] + rhs;
  191 + return *this;
  192 + }
  193 +
  194 + /// Assign a scalar to all values
  195 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  196 + ptr[0] = ptr[0] = rhs;
  197 + ptr[1] = ptr[1] = rhs;
  198 + ptr[2] = ptr[2] = rhs;
  199 + return *this;
  200 + }
  201 +
  202 + /// Casting and assignment
  203 + template<typename Y>
  204 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  205 + ptr[0] = (T)rhs.ptr[0];
  206 + ptr[1] = (T)rhs.ptr[1];
  207 + ptr[2] = (T)rhs.ptr[2];
  208 + return *this;
  209 + }
  210 +
  211 + /// Unary minus (returns the negative of the vector)
  212 + CUDA_CALLABLE vec3<T> operator-() const{
  213 + vec3<T> result;
  214 + result.ptr[0] = -ptr[0];
  215 + result.ptr[1] = -ptr[1];
  216 + result.ptr[2] = -ptr[2];
  217 + return result;
  218 + }
  219 +
  220 +<<<<<<< HEAD
  221 +//#ifndef __NVCC__
  222 +=======
  223 +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c
  224 + /// Outputs the vector as a string
  225 + std::string str() const{
  226 + std::stringstream ss;
  227 +
  228 + const size_t N = 3;
  229 +
  230 + ss<<"[";
  231 + for(size_t i=0; i<N; i++)
  232 + {
  233 + ss<<ptr[i];
  234 + if(i != N-1)
  235 + ss<<", ";
  236 + }
  237 + ss<<"]";
  238 +
  239 + return ss.str();
  240 + }
  241 +<<<<<<< HEAD
  242 +//#endif
  243 +=======
  244 +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c
  245 +
  246 + size_t size(){ return 3; }
  247 +
  248 + }; //end class vec3
  249 +} //end namespace stim
  250 +
  251 +/// Multiply a vector by a constant when the vector is on the right hand side
  252 +template <typename T>
  253 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  254 + return rhs * lhs;
  255 +}
  256 +
  257 +//stream operator
  258 +template<typename T>
  259 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  260 + os<<rhs.str();
  261 + return os;
  262 +}
  263 +
  264 +#endif
... ...
stim/math/vec3_BASE_62876.h 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](size_t idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 + CUDA_CALLABLE T* data(){
  45 + return ptr;
  46 + }
  47 +
  48 +/// Casting operator. Creates a new vector with a new type U.
  49 + template< typename U >
  50 + CUDA_CALLABLE operator vec3<U>(){
  51 + vec3<U> result;
  52 + result.ptr[0] = (U)ptr[0];
  53 + result.ptr[1] = (U)ptr[1];
  54 + result.ptr[2] = (U)ptr[2];
  55 +
  56 + return result;
  57 + }
  58 +
  59 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  60 + CUDA_CALLABLE T len_sq() const{
  61 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  62 + }
  63 +
  64 + /// computes the Euclidean length of the vector
  65 + CUDA_CALLABLE T len() const{
  66 + return sqrt(len_sq());
  67 + }
  68 +
  69 +
  70 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  71 + CUDA_CALLABLE vec3<T> cart2sph() const{
  72 + vec3<T> sph;
  73 + sph.ptr[0] = len();
  74 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  75 + if(sph.ptr[0] == 0)
  76 + sph.ptr[2] = 0;
  77 + else
  78 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  79 + return sph;
  80 + }
  81 +
  82 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  83 + CUDA_CALLABLE vec3<T> sph2cart() const{
  84 + vec3<T> cart;
  85 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  86 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  88 +
  89 + return cart;
  90 + }
  91 +
  92 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  93 + CUDA_CALLABLE vec3<T> norm() const{
  94 + vec3<T> result;
  95 + T l = len(); //compute the vector length
  96 + return (*this) / l;
  97 + }
  98 +
  99 + /// Computes the cross product of a 3-dimensional vector
  100 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  101 +
  102 + vec3<T> result;
  103 +
  104 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  105 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  106 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  107 +
  108 + return result;
  109 + }
  110 +
  111 + /// Compute the Euclidean inner (dot) product
  112 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  113 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  114 + }
  115 +
  116 + /// Arithmetic addition operator
  117 +
  118 + /// @param rhs is the right-hand-side operator for the addition
  119 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  120 + vec3<T> result;
  121 + result.ptr[0] = ptr[0] + rhs[0];
  122 + result.ptr[1] = ptr[1] + rhs[1];
  123 + result.ptr[2] = ptr[2] + rhs[2];
  124 + return result;
  125 + }
  126 +
  127 + /// Arithmetic addition to a scalar
  128 +
  129 + /// @param rhs is the right-hand-side operator for the addition
  130 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  131 + vec3<T> result;
  132 + result.ptr[0] = ptr[0] + rhs;
  133 + result.ptr[1] = ptr[1] + rhs;
  134 + result.ptr[2] = ptr[2] + rhs;
  135 + return result;
  136 + }
  137 +
  138 + /// Arithmetic subtraction operator
  139 +
  140 + /// @param rhs is the right-hand-side operator for the subtraction
  141 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  142 + vec3<T> result;
  143 + result.ptr[0] = ptr[0] - rhs[0];
  144 + result.ptr[1] = ptr[1] - rhs[1];
  145 + result.ptr[2] = ptr[2] - rhs[2];
  146 + return result;
  147 + }
  148 + /// Arithmetic subtraction to a scalar
  149 +
  150 + /// @param rhs is the right-hand-side operator for the addition
  151 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  152 + vec3<T> result;
  153 + result.ptr[0] = ptr[0] - rhs;
  154 + result.ptr[1] = ptr[1] - rhs;
  155 + result.ptr[2] = ptr[2] - rhs;
  156 + return result;
  157 + }
  158 +
  159 + /// Arithmetic scalar multiplication operator
  160 +
  161 + /// @param rhs is the right-hand-side operator for the subtraction
  162 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  163 + vec3<T> result;
  164 + result.ptr[0] = ptr[0] * rhs;
  165 + result.ptr[1] = ptr[1] * rhs;
  166 + result.ptr[2] = ptr[2] * rhs;
  167 + return result;
  168 + }
  169 +
  170 + /// Arithmetic scalar division operator
  171 +
  172 + /// @param rhs is the right-hand-side operator for the subtraction
  173 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  174 + return (*this) * ((T)1.0/rhs);
  175 + }
  176 +
  177 + /// Multiplication by a scalar, followed by assignment
  178 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  179 + ptr[0] = ptr[0] * rhs;
  180 + ptr[1] = ptr[1] * rhs;
  181 + ptr[2] = ptr[2] * rhs;
  182 + return *this;
  183 + }
  184 +
  185 + /// Addition and assignment
  186 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  187 + ptr[0] = ptr[0] + rhs;
  188 + ptr[1] = ptr[1] + rhs;
  189 + ptr[2] = ptr[2] + rhs;
  190 + return *this;
  191 + }
  192 +
  193 + /// Assign a scalar to all values
  194 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  195 + ptr[0] = ptr[0] = rhs;
  196 + ptr[1] = ptr[1] = rhs;
  197 + ptr[2] = ptr[2] = rhs;
  198 + return *this;
  199 + }
  200 +
  201 + /// Casting and assignment
  202 + template<typename Y>
  203 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  204 + ptr[0] = (T)rhs.ptr[0];
  205 + ptr[1] = (T)rhs.ptr[1];
  206 + ptr[2] = (T)rhs.ptr[2];
  207 + return *this;
  208 + }
  209 +
  210 + /// Unary minus (returns the negative of the vector)
  211 + CUDA_CALLABLE vec3<T> operator-() const{
  212 + vec3<T> result;
  213 + result.ptr[0] = -ptr[0];
  214 + result.ptr[1] = -ptr[1];
  215 + result.ptr[2] = -ptr[2];
  216 + return result;
  217 + }
  218 +
  219 +#ifndef __NVCC__
  220 + /// Outputs the vector as a string
  221 + std::string str() const{
  222 + std::stringstream ss;
  223 +
  224 + const size_t N = 3;
  225 +
  226 + ss<<"[";
  227 + for(size_t i=0; i<N; i++)
  228 + {
  229 + ss<<ptr[i];
  230 + if(i != N-1)
  231 + ss<<", ";
  232 + }
  233 + ss<<"]";
  234 +
  235 + return ss.str();
  236 + }
  237 +#endif
  238 +
  239 + size_t size(){ return 3; }
  240 +
  241 + }; //end class vec3
  242 +} //end namespace stim
  243 +
  244 +/// Multiply a vector by a constant when the vector is on the right hand side
  245 +template <typename T>
  246 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  247 + return rhs * lhs;
  248 +}
  249 +
  250 +//stream operator
  251 +template<typename T>
  252 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  253 + os<<rhs.str();
  254 + return os;
  255 +}
  256 +
  257 +#endif
... ...
stim/math/vec3_LOCAL_62876.h 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +
  8 +namespace stim{
  9 +
  10 +
  11 +/// A class designed to act as a 3D vector with CUDA compatibility
  12 +template<typename T>
  13 +class vec3{
  14 +
  15 +protected:
  16 + T ptr[3];
  17 +
  18 +public:
  19 +
  20 + CUDA_CALLABLE vec3(){}
  21 +
  22 + CUDA_CALLABLE vec3(T v){
  23 + ptr[0] = ptr[1] = ptr[2] = v;
  24 + }
  25 +
  26 + CUDA_CALLABLE vec3(T x, T y, T z){
  27 + ptr[0] = x;
  28 + ptr[1] = y;
  29 + ptr[2] = z;
  30 + }
  31 +
  32 + //copy constructor
  33 + CUDA_CALLABLE vec3( const vec3<T>& other){
  34 + ptr[0] = other.ptr[0];
  35 + ptr[1] = other.ptr[1];
  36 + ptr[2] = other.ptr[2];
  37 + }
  38 +
  39 + //access an element using an index
  40 + CUDA_CALLABLE T& operator[](size_t idx){
  41 + return ptr[idx];
  42 + }
  43 +
  44 + CUDA_CALLABLE T* data(){
  45 + return ptr;
  46 + }
  47 +
  48 +/// Casting operator. Creates a new vector with a new type U.
  49 + template< typename U >
  50 + CUDA_CALLABLE operator vec3<U>(){
  51 + vec3<U> result;
  52 + result.ptr[0] = (U)ptr[0];
  53 + result.ptr[1] = (U)ptr[1];
  54 + result.ptr[2] = (U)ptr[2];
  55 +
  56 + return result;
  57 + }
  58 +
  59 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  60 + CUDA_CALLABLE T len_sq() const{
  61 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  62 + }
  63 +
  64 + /// computes the Euclidean length of the vector
  65 + CUDA_CALLABLE T len() const{
  66 + return sqrt(len_sq());
  67 + }
  68 +
  69 +
  70 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  71 + CUDA_CALLABLE vec3<T> cart2sph() const{
  72 + vec3<T> sph;
  73 + sph.ptr[0] = len();
  74 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  75 + if(sph.ptr[0] == 0)
  76 + sph.ptr[2] = 0;
  77 + else
  78 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  79 + return sph;
  80 + }
  81 +
  82 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  83 + CUDA_CALLABLE vec3<T> sph2cart() const{
  84 + vec3<T> cart;
  85 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  86 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  88 +
  89 + return cart;
  90 + }
  91 +
  92 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  93 + CUDA_CALLABLE vec3<T> norm() const{
  94 + vec3<T> result;
  95 + T l = len(); //compute the vector length
  96 + return (*this) / l;
  97 + }
  98 +
  99 + /// Computes the cross product of a 3-dimensional vector
  100 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  101 +
  102 + vec3<T> result;
  103 +
  104 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  105 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  106 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  107 +
  108 + return result;
  109 + }
  110 +
  111 + /// Compute the Euclidean inner (dot) product
  112 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  113 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  114 + }
  115 +
  116 + /// Arithmetic addition operator
  117 +
  118 + /// @param rhs is the right-hand-side operator for the addition
  119 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  120 + vec3<T> result;
  121 + result.ptr[0] = ptr[0] + rhs[0];
  122 + result.ptr[1] = ptr[1] + rhs[1];
  123 + result.ptr[2] = ptr[2] + rhs[2];
  124 + return result;
  125 + }
  126 +
  127 + /// Arithmetic addition to a scalar
  128 +
  129 + /// @param rhs is the right-hand-side operator for the addition
  130 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  131 + vec3<T> result;
  132 + result.ptr[0] = ptr[0] + rhs;
  133 + result.ptr[1] = ptr[1] + rhs;
  134 + result.ptr[2] = ptr[2] + rhs;
  135 + return result;
  136 + }
  137 +
  138 + /// Arithmetic subtraction operator
  139 +
  140 + /// @param rhs is the right-hand-side operator for the subtraction
  141 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  142 + vec3<T> result;
  143 + result.ptr[0] = ptr[0] - rhs[0];
  144 + result.ptr[1] = ptr[1] - rhs[1];
  145 + result.ptr[2] = ptr[2] - rhs[2];
  146 + return result;
  147 + }
  148 + /// Arithmetic subtraction to a scalar
  149 +
  150 + /// @param rhs is the right-hand-side operator for the addition
  151 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  152 + vec3<T> result;
  153 + result.ptr[0] = ptr[0] - rhs;
  154 + result.ptr[1] = ptr[1] - rhs;
  155 + result.ptr[2] = ptr[2] - rhs;
  156 + return result;
  157 + }
  158 +
  159 + /// Arithmetic scalar multiplication operator
  160 +
  161 + /// @param rhs is the right-hand-side operator for the subtraction
  162 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  163 + vec3<T> result;
  164 + result.ptr[0] = ptr[0] * rhs;
  165 + result.ptr[1] = ptr[1] * rhs;
  166 + result.ptr[2] = ptr[2] * rhs;
  167 + return result;
  168 + }
  169 +
  170 + /// Arithmetic scalar division operator
  171 +
  172 + /// @param rhs is the right-hand-side operator for the subtraction
  173 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  174 + return (*this) * ((T)1.0/rhs);
  175 + }
  176 +
  177 + /// Multiplication by a scalar, followed by assignment
  178 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  179 + ptr[0] = ptr[0] * rhs;
  180 + ptr[1] = ptr[1] * rhs;
  181 + ptr[2] = ptr[2] * rhs;
  182 + return *this;
  183 + }
  184 +
  185 + /// Addition and assignment
  186 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  187 + ptr[0] = ptr[0] + rhs;
  188 + ptr[1] = ptr[1] + rhs;
  189 + ptr[2] = ptr[2] + rhs;
  190 + return *this;
  191 + }
  192 +
  193 + /// Assign a scalar to all values
  194 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  195 + ptr[0] = ptr[0] = rhs;
  196 + ptr[1] = ptr[1] = rhs;
  197 + ptr[2] = ptr[2] = rhs;
  198 + return *this;
  199 + }
  200 +
  201 + /// Casting and assignment
  202 + template<typename Y>
  203 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  204 + ptr[0] = (T)rhs.ptr[0];
  205 + ptr[1] = (T)rhs.ptr[1];
  206 + ptr[2] = (T)rhs.ptr[2];
  207 + return *this;
  208 + }
  209 +
  210 + /// Unary minus (returns the negative of the vector)
  211 + CUDA_CALLABLE vec3<T> operator-() const{
  212 + vec3<T> result;
  213 + result.ptr[0] = -ptr[0];
  214 + result.ptr[1] = -ptr[1];
  215 + result.ptr[2] = -ptr[2];
  216 + return result;
  217 + }
  218 +
  219 +//#ifndef __NVCC__
  220 + /// Outputs the vector as a string
  221 + std::string str() const{
  222 + std::stringstream ss;
  223 +
  224 + const size_t N = 3;
  225 +
  226 + ss<<"[";
  227 + for(size_t i=0; i<N; i++)
  228 + {
  229 + ss<<ptr[i];
  230 + if(i != N-1)
  231 + ss<<", ";
  232 + }
  233 + ss<<"]";
  234 +
  235 + return ss.str();
  236 + }
  237 +//#endif
  238 +
  239 + size_t size(){ return 3; }
  240 +
  241 + }; //end class vec3
  242 +} //end namespace stim
  243 +
  244 +/// Multiply a vector by a constant when the vector is on the right hand side
  245 +template <typename T>
  246 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  247 + return rhs * lhs;
  248 +}
  249 +
  250 +//stream operator
  251 +template<typename T>
  252 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  253 + os<<rhs.str();
  254 + return os;
  255 +}
  256 +
  257 +#endif
... ...
stim/math/vec3_REMOTE_62876.h 0 → 100644
  1 +#ifndef STIM_VEC3_H
  2 +#define STIM_VEC3_H
  3 +
  4 +
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +#include <cmath>
  7 +
  8 +
  9 +namespace stim{
  10 +
  11 +
  12 +/// A class designed to act as a 3D vector with CUDA compatibility
  13 +template<typename T>
  14 +class vec3{
  15 +
  16 +protected:
  17 + T ptr[3];
  18 +
  19 +public:
  20 +
  21 + CUDA_CALLABLE vec3(){}
  22 +
  23 + CUDA_CALLABLE vec3(T v){
  24 + ptr[0] = ptr[1] = ptr[2] = v;
  25 + }
  26 +
  27 + CUDA_CALLABLE vec3(T x, T y, T z){
  28 + ptr[0] = x;
  29 + ptr[1] = y;
  30 + ptr[2] = z;
  31 + }
  32 +
  33 + //copy constructor
  34 + CUDA_CALLABLE vec3( const vec3<T>& other){
  35 + ptr[0] = other.ptr[0];
  36 + ptr[1] = other.ptr[1];
  37 + ptr[2] = other.ptr[2];
  38 + }
  39 +
  40 + //access an element using an index
  41 + CUDA_CALLABLE T& operator[](size_t idx){
  42 + return ptr[idx];
  43 + }
  44 +
  45 + CUDA_CALLABLE T* data(){
  46 + return ptr;
  47 + }
  48 +
  49 +/// Casting operator. Creates a new vector with a new type U.
  50 + template< typename U >
  51 + CUDA_CALLABLE operator vec3<U>(){
  52 + vec3<U> result;
  53 + result.ptr[0] = (U)ptr[0];
  54 + result.ptr[1] = (U)ptr[1];
  55 + result.ptr[2] = (U)ptr[2];
  56 +
  57 + return result;
  58 + }
  59 +
  60 + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter)
  61 + CUDA_CALLABLE T len_sq() const{
  62 + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2];
  63 + }
  64 +
  65 + /// computes the Euclidean length of the vector
  66 + CUDA_CALLABLE T len() const{
  67 + return sqrt(len_sq());
  68 + }
  69 +
  70 +
  71 + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
  72 + CUDA_CALLABLE vec3<T> cart2sph() const{
  73 + vec3<T> sph;
  74 + sph.ptr[0] = len();
  75 + sph.ptr[1] = std::atan2(ptr[1], ptr[0]);
  76 + if(sph.ptr[0] == 0)
  77 + sph.ptr[2] = 0;
  78 + else
  79 + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]);
  80 + return sph;
  81 + }
  82 +
  83 + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi])
  84 + CUDA_CALLABLE vec3<T> sph2cart() const{
  85 + vec3<T> cart;
  86 + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]);
  87 + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]);
  88 + cart.ptr[2] = ptr[0] * std::cos(ptr[2]);
  89 +
  90 + return cart;
  91 + }
  92 +
  93 + /// Computes the normalized vector (where each coordinate is divided by the L2 norm)
  94 + CUDA_CALLABLE vec3<T> norm() const{
  95 + vec3<T> result;
  96 + T l = len(); //compute the vector length
  97 + return (*this) / l;
  98 + }
  99 +
  100 + /// Computes the cross product of a 3-dimensional vector
  101 + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{
  102 +
  103 + vec3<T> result;
  104 +
  105 + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]);
  106 + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]);
  107 + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]);
  108 +
  109 + return result;
  110 + }
  111 +
  112 + /// Compute the Euclidean inner (dot) product
  113 + CUDA_CALLABLE T dot(vec3<T> rhs) const{
  114 + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2];
  115 + }
  116 +
  117 + /// Arithmetic addition operator
  118 +
  119 + /// @param rhs is the right-hand-side operator for the addition
  120 + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{
  121 + vec3<T> result;
  122 + result.ptr[0] = ptr[0] + rhs[0];
  123 + result.ptr[1] = ptr[1] + rhs[1];
  124 + result.ptr[2] = ptr[2] + rhs[2];
  125 + return result;
  126 + }
  127 +
  128 + /// Arithmetic addition to a scalar
  129 +
  130 + /// @param rhs is the right-hand-side operator for the addition
  131 + CUDA_CALLABLE vec3<T> operator+(T rhs) const{
  132 + vec3<T> result;
  133 + result.ptr[0] = ptr[0] + rhs;
  134 + result.ptr[1] = ptr[1] + rhs;
  135 + result.ptr[2] = ptr[2] + rhs;
  136 + return result;
  137 + }
  138 +
  139 + /// Arithmetic subtraction operator
  140 +
  141 + /// @param rhs is the right-hand-side operator for the subtraction
  142 + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{
  143 + vec3<T> result;
  144 + result.ptr[0] = ptr[0] - rhs[0];
  145 + result.ptr[1] = ptr[1] - rhs[1];
  146 + result.ptr[2] = ptr[2] - rhs[2];
  147 + return result;
  148 + }
  149 + /// Arithmetic subtraction to a scalar
  150 +
  151 + /// @param rhs is the right-hand-side operator for the addition
  152 + CUDA_CALLABLE vec3<T> operator-(T rhs) const{
  153 + vec3<T> result;
  154 + result.ptr[0] = ptr[0] - rhs;
  155 + result.ptr[1] = ptr[1] - rhs;
  156 + result.ptr[2] = ptr[2] - rhs;
  157 + return result;
  158 + }
  159 +
  160 + /// Arithmetic scalar multiplication operator
  161 +
  162 + /// @param rhs is the right-hand-side operator for the subtraction
  163 + CUDA_CALLABLE vec3<T> operator*(T rhs) const{
  164 + vec3<T> result;
  165 + result.ptr[0] = ptr[0] * rhs;
  166 + result.ptr[1] = ptr[1] * rhs;
  167 + result.ptr[2] = ptr[2] * rhs;
  168 + return result;
  169 + }
  170 +
  171 + /// Arithmetic scalar division operator
  172 +
  173 + /// @param rhs is the right-hand-side operator for the subtraction
  174 + CUDA_CALLABLE vec3<T> operator/(T rhs) const{
  175 + return (*this) * ((T)1.0/rhs);
  176 + }
  177 +
  178 + /// Multiplication by a scalar, followed by assignment
  179 + CUDA_CALLABLE vec3<T> operator*=(T rhs){
  180 + ptr[0] = ptr[0] * rhs;
  181 + ptr[1] = ptr[1] * rhs;
  182 + ptr[2] = ptr[2] * rhs;
  183 + return *this;
  184 + }
  185 +
  186 + /// Addition and assignment
  187 + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){
  188 + ptr[0] = ptr[0] + rhs;
  189 + ptr[1] = ptr[1] + rhs;
  190 + ptr[2] = ptr[2] + rhs;
  191 + return *this;
  192 + }
  193 +
  194 + /// Assign a scalar to all values
  195 + CUDA_CALLABLE vec3<T> & operator=(T rhs){
  196 + ptr[0] = ptr[0] = rhs;
  197 + ptr[1] = ptr[1] = rhs;
  198 + ptr[2] = ptr[2] = rhs;
  199 + return *this;
  200 + }
  201 +
  202 + /// Casting and assignment
  203 + template<typename Y>
  204 + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){
  205 + ptr[0] = (T)rhs.ptr[0];
  206 + ptr[1] = (T)rhs.ptr[1];
  207 + ptr[2] = (T)rhs.ptr[2];
  208 + return *this;
  209 + }
  210 +
  211 + /// Unary minus (returns the negative of the vector)
  212 + CUDA_CALLABLE vec3<T> operator-() const{
  213 + vec3<T> result;
  214 + result.ptr[0] = -ptr[0];
  215 + result.ptr[1] = -ptr[1];
  216 + result.ptr[2] = -ptr[2];
  217 + return result;
  218 + }
  219 +
  220 + /// Outputs the vector as a string
  221 + std::string str() const{
  222 + std::stringstream ss;
  223 +
  224 + const size_t N = 3;
  225 +
  226 + ss<<"[";
  227 + for(size_t i=0; i<N; i++)
  228 + {
  229 + ss<<ptr[i];
  230 + if(i != N-1)
  231 + ss<<", ";
  232 + }
  233 + ss<<"]";
  234 +
  235 + return ss.str();
  236 + }
  237 +
  238 + size_t size(){ return 3; }
  239 +
  240 + }; //end class vec3
  241 +} //end namespace stim
  242 +
  243 +/// Multiply a vector by a constant when the vector is on the right hand side
  244 +template <typename T>
  245 +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){
  246 + return rhs * lhs;
  247 +}
  248 +
  249 +//stream operator
  250 +template<typename T>
  251 +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){
  252 + os<<rhs.str();
  253 + return os;
  254 +}
  255 +
  256 +#endif
... ...
stim/math/vector.h
... ... @@ -5,6 +5,7 @@
5 5 #include <cmath>
6 6 #include <sstream>
7 7 #include <vector>
  8 +#include <algorithm>
8 9  
9 10 #include <stim/cuda/cudatools/callable.h>
10 11 #include <stim/math/vec3.h>
... ... @@ -74,11 +75,11 @@ struct vec : public std::vector&lt;T&gt;
74 75 at(i) = other[i];
75 76 }
76 77 }
77   -
  78 +
78 79 // vec( vec3<T>& other){
79 80 // resize(3); //resize the current vector to match the copy
80 81 // for(size_t i=0; i<3; i++){ //copy each element
81   -// at(i) = other[i];
  82 +// at(i) = other[i];
82 83 // }
83 84 // }
84 85  
... ... @@ -139,16 +140,16 @@ struct vec : public std::vector&lt;T&gt;
139 140  
140 141 }
141 142  
142   -
143   - vec<T> cyl2cart() const
144   - {
145   - vec<T> cyl;
146   - cyl.push_back(at(0)*std::sin(at(1)));
147   - cyl.push_back(at(0)*std::cos(at(1)));
148   - cyl.push_back(at(2));
149   - return(cyl);
150   -
151   - }
  143 +
  144 + vec<T> cyl2cart() const
  145 + {
  146 + vec<T> cyl;
  147 + cyl.push_back(at(0)*std::sin(at(1)));
  148 + cyl.push_back(at(0)*std::cos(at(1)));
  149 + cyl.push_back(at(2));
  150 + return(cyl);
  151 +
  152 + }
152 153 /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi])
153 154 vec<T> cart2sph() const
154 155 {
... ... @@ -335,16 +336,16 @@ struct vec : public std::vector&lt;T&gt;
335 336 return *this;
336 337 }
337 338  
338   - /// Cast to a vec3
339   - operator stim::vec3<T>(){
340   - stim::vec3<T> r;
341   - size_t N = std::min<size_t>(size(), 3);
342   - for(size_t i = 0; i < N; i++)
343   - r[i] = at(i);
344   - return r;
345   - }
346   -
347   -
  339 + /// Cast to a vec3
  340 + operator stim::vec3<T>(){
  341 + stim::vec3<T> r;
  342 + size_t N = std::min(size(), (size_t)3);
  343 + for(size_t i = 0; i < N; i++)
  344 + r[i] = at(i);
  345 + return r;
  346 + }
  347 +
  348 +
348 349 /// Casting and assignment
349 350 template<typename Y>
350 351 vec<T> & operator=(vec<Y> rhs){
... ... @@ -355,16 +356,16 @@ struct vec : public std::vector&lt;T&gt;
355 356 at(i) = rhs[i];
356 357 return *this;
357 358 }
358   -
359   - /// Assign a vec = vec3
360   - template<typename Y>
361   - vec<T> & operator=(vec3<Y> rhs)
362   - {
363   - resize(3);
364   - for(size_t i=0; i<3; i++)
365   - at(i) = rhs[i];
366   - return *this;
367   - }
  359 +
  360 + /// Assign a vec = vec3
  361 + template<typename Y>
  362 + vec<T> & operator=(vec3<Y> rhs)
  363 + {
  364 + resize(3);
  365 + for(size_t i=0; i<3; i++)
  366 + at(i) = rhs[i];
  367 + return *this;
  368 + }
368 369  
369 370 /// Unary minus (returns the negative of the vector)
370 371 vec<T> operator-() const{
... ...
stim/parser/arguments.h
... ... @@ -13,6 +13,44 @@
13 13 #include <Windows.h>
14 14 #endif
15 15  
  16 +/**The arglist class implements command line arguments.
  17 + Example:
  18 +
  19 + 1) Create an arglist instance:
  20 +
  21 + stim::arglist args;
  22 +
  23 + 2) Add arguments:
  24 +
  25 + args.add("help", "prints this help");
  26 + args.add("foo", "foo takes a single integer value", "", "[intval]");
  27 + args.add("bar", "bar takes two floating point values", "", "[value1], [value2]");
  28 +
  29 + 3) Parse the command line:
  30 +
  31 + args.parse(argc, argv);
  32 +
  33 + 4) You generally want to immediately test for help and output available arguments:
  34 +
  35 + if(args["help"].is_set())
  36 + std::cout<<args.str();
  37 +
  38 +
  39 +
  40 + 5) Retrieve values:
  41 +
  42 + int foo;
  43 + float bar1, bar2;
  44 + if(args["foo"])
  45 + foo = args["foo"].as_int();
  46 + if(args["bar"]){
  47 + bar1 = args["bar"].as_float(0);
  48 + bar2 = args["bar"].as_float(1);
  49 + }
  50 +
  51 +
  52 +**/
  53 +
16 54 namespace stim{
17 55  
18 56 class cmd_option
... ... @@ -258,10 +296,12 @@ namespace stim{
258 296 flag = true;
259 297 }
260 298  
261   - bool is_set()
262   - {
  299 + bool is_set() const{
263 300 return flag;
264 301 }
  302 + operator bool() const{
  303 + return is_set();
  304 + }
265 305  
266 306 };
267 307  
... ... @@ -271,43 +311,7 @@ namespace stim{
271 311 size_t index;
272 312 };
273 313  
274   - /**The arglist class implements command line arguments.
275   - Example:
276   -
277   - 1) Create an arglist instance:
278   -
279   - stim::arglist args;
280   -
281   - 2) Add arguments:
282 314  
283   - args.add("help", "prints this help");
284   - args.add("foo", "foo takes a single integer value", "", "[intval]");
285   - args.add("bar", "bar takes two floating point values", "", "[value1], [value2]");
286   -
287   - 3) Parse the command line:
288   -
289   - args.parse(argc, argv);
290   -
291   - 4) You generally want to immediately test for help and output available arguments:
292   -
293   - if(args["help"].is_set())
294   - std::cout<<args.str();
295   -
296   -
297   -
298   - 5) Retrieve values:
299   -
300   - int foo;
301   - float bar1, bar2;
302   - if(args["foo"])
303   - foo = args["foo"].as_int();
304   - if(args["bar"]){
305   - bar1 = args["bar"].as_float(0);
306   - bar2 = args["bar"].as_float(1);
307   - }
308   -
309   -
310   - **/
311 315  
312 316 class arglist
313 317 {
... ... @@ -528,21 +532,21 @@ namespace stim{
528 532 std::vector<std::string> arg_vector(){
529 533 return args;
530 534 }
531   - ///Returns an object describing the argument
532   -
533   - /// @param _name is the name of the requested argument
534   - cmd_option operator[](std::string _name){
535   - std::vector<cmd_option>::iterator it;
536   - it = find(opts.begin(), opts.end(), _name);// - opts.begin();
  535 + ///Returns an object describing the argument
537 536  
538   - if(it == opts.end()){
539   - std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl;
540   - exit(1);
541   - }
  537 + /// @param _name is the name of the requested argument
  538 + cmd_option operator[](std::string _name){
  539 + std::vector<cmd_option>::iterator it;
  540 + it = find(opts.begin(), opts.end(), _name);// - opts.begin();
542 541  
543   - return *it;
  542 + if(it == opts.end()){
  543 + std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl;
  544 + exit(1);
544 545 }
545 546  
  547 + return *it;
  548 + }
  549 +
546 550  
547 551 };
548 552  
... ...
stim/sampling/func1_from_symmetric2.h 0 → 100644
  1 +/// Reconstruct a 1D function from a 2D symmetric function. This function takes a 2D image f(x,y) as input and
  2 +/// builds a 1D function f(r) where r = sqrt(x^2 + y^2) to approximate this 2D function.
  3 +/// This is useful for several applications, such as:
  4 +/// 1) Calculating a 1D function from a noisy 2D image, when you know the 2D image is supposed to be symmetric
  5 +/// 2) Calculating the average value for every r = sqrt(x^2 + y^2)
  6 +
  7 +/// Given a set of function samples equally spaced by dx, calculate the two samples closest to x and the proximity ratio alpha.
  8 +/// This can be used to linearly interpolate between an array of equally spaced values. Given the query value x, the
  9 +/// interpolated value can be calculated as r = values[sample] * alpha + values[sample + 1] * (1 - alpha)
  10 +/// @param sample is the lowest bin closest to the query point x
  11 +/// @param alpha is the ratio of x between [sample, sample + 1]
  12 +/// @param dx is the spacing between values
  13 +/// @param x is the query point
  14 +template<typename T>
  15 +void lerp_alpha(T& sample, T& alpha, T dx, T x){
  16 + sample = std::floor(x/dx);
  17 + alpha = 1 - (x - (b * dx)) / dx;
  18 +}
  19 +
  20 +/// This function assumes that the input image is square, that the # of samples are odd, and that r=0 is at the center
  21 +/// @param fr is an array of X elements that will store the reconstructed function
  22 +/// @param dr is the spacing (in pixels) between samples in fr
  23 +template<typename T>
  24 +void cpu_func1_from_symmetric2(T* fr, T& dr, T* fxy, size_t X){
  25 +
  26 + if(X%2 == 0){ //the 2D function must be odd (a sample must be available for r=0)
  27 + std::err<<"Error, X = "<<X<<" must be odd."<<std::endl;
  28 + exit(1);
  29 + }
  30 + size_t C = X/2+1; //calculate the center pixel coordinate
  31 + size_t N = C * C; //number of values in the folded function
  32 +
  33 + // The first step is to fold the function 8 times to take advantage of symmetry in the grid
  34 + T* folded = (T*) malloc(sizeof(T) * N ); //allocate space for the folded function
  35 + memset(folded, 0, sizeof(T) * N);
  36 + char* count = (char*) malloc( N ); //allocate space for a counter for the folded function
  37 + memset(count, 0, sizeof(T) * N);
  38 + size_t xi, yi; //indices into the image f(xi, yi)
  39 + size_t xii, yii; //indices into the folded image
  40 + T v; //register to store the value at point (xi, yi)
  41 + for(xi = 0; xi < X; xi++){
  42 + for(yi = 0; yi < X; yi++){
  43 + v = fxy[yi * X + xi]; //retrieve f(x, y)
  44 +
  45 + xii = xi;
  46 + yii = yi; //initialize the indices into the folded image
  47 +
  48 + //fold the function along the x and y axes
  49 + if(xi > C) xii = 2 * C - xi - 1; //calculate the folded index of x
  50 + if(yi > C) yii = 2 * C - yi - 1; //calculate the folded index of y
  51 +
  52 + if(xii < yii) std::swap<T>(xii, yii); //fold the function again along the 45-degree line
  53 +
  54 + folded[yii * C + xii] += v; //add the value to the folded function
  55 + count[yii * C + xii] += 1; //add a counter to the counter table
  56 + }
  57 + }
  58 +
  59 + //divide out the counter to correct the folded function
  60 + for(size_t i = 0; i < N){
  61 + folded[i] /= (T)count[i]; //divide out the counter
  62 + }
  63 +
  64 + T max_r = sqrt(X * X + Y * Y); //calculate the maximum r value, which will be along the image diagonal
  65 + T dr = max_r / (X - 1); //spacing between samples in the output function f(r)
  66 +
  67 + T* fA = (T*) malloc( sizeof(T) * X); //allocate space for a counter function storing alpha weights
  68 + memset(fA, 0, sizeof(T) * X); //zero out the alpha array
  69 + memset(fr, 0, sizeof(T) * X); //zero out the output function
  70 +
  71 + T r; //register to store the value of r at each point
  72 + size_t sample;
  73 + T alpha;
  74 + for(xi = 0; xi < C; xi++){
  75 + for(yi = 0; yi < xi; yi++){
  76 + r = sqrt(xi*xi + yi*yi); //calculate the value of r for the current (x, y)
  77 + lerp_alpha(sample, alpha, dr, r); //calculate the lowest nearby sample index and the associated alpha weight
  78 + fr[sample] += folded[yi * C + xi] * alpha; //sum the weighted value from the folded function
  79 + fA[sample] += alpha; //sum the weight
  80 +
  81 + if(sample < X - 1){ //if we aren't dealing with the last bin
  82 + fr[sample + 1] += folded[yi * C + xi] * (1.0 - alpha); //calculate the weighted value for the second point
  83 + fA[sample + 1] += 1 - alpha; //add the second alpha value
  84 + }
  85 + }
  86 + }
  87 +
  88 + //divide out the alpha values
  89 + for(size_t i = 0; i < X; i++)
  90 + fr[i] /= fA[i];
  91 +
  92 + //free allocated memory
  93 + free(folded);
  94 + free(count);
  95 + free(fA);
  96 +}
0 97 \ No newline at end of file
... ...
stim/structures/kdtree.cuh 0 → 100644
  1 +// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree
  2 +// data should be stored in row-major
  3 +// x1,x2,x3,x4,x5......
  4 +// y1,y2,y3,y4,y5......
  5 +// ....................
  6 +// ....................
  7 +
  8 +#ifndef KDTREE_H
  9 +#define KDTREE_H
  10 +#define stack_size 50
  11 +
  12 +#include "device_launch_parameters.h"
  13 +#include <cuda.h>
  14 +#include <cuda_runtime_api.h>
  15 +#include "cuda_runtime.h"
  16 +#include <vector>
  17 +#include <cstring>
  18 +#include <float.h>
  19 +#include <iostream>
  20 +#include <algorithm>
  21 +#include <stim/cuda/cudatools/error.h>
  22 +#include <stim/visualization/aabbn.h>
  23 +
  24 +namespace stim {
  25 + namespace kdtree {
  26 + template<typename T, int D> // typename refers to float or double while D refers to dimension of points
  27 + struct point {
  28 + T dim[D]; // create a structure to store every one input point
  29 + };
  30 +
  31 + template<typename T>
  32 + class kdnode {
  33 + public:
  34 + kdnode() { // constructor for initializing a kdnode
  35 + parent = NULL; // set every node's parent, left and right kdnode pointers to NULL
  36 + left = NULL;
  37 + right = NULL;
  38 + parent_idx = -1; // set parent node index to default -1
  39 + left_idx = -1;
  40 + right_idx = -1;
  41 + split_value = -1; // set split_value to default -1
  42 + }
  43 + int idx; // index of current node
  44 + int parent_idx, left_idx, right_idx; // index of parent, left and right nodes
  45 + kdnode *parent, *left, *right; // parent, left and right kdnodes
  46 + T split_value; // splitting value of current node
  47 + std::vector <size_t> indices; // it indicates the points' indices that current node has
  48 + size_t level; // tree level of current node
  49 + };
  50 + } // end of namespace kdtree
  51 +
  52 + template <typename T, int D = 3> // set dimension of data to default 3
  53 + class cpu_kdtree {
  54 + protected:
  55 + int current_axis; // current judging axis
  56 + int n_id; // store the total number of nodes
  57 + std::vector < typename kdtree::point<T, D> > *tmp_points; // transfer or temperary points
  58 + std::vector < typename kdtree::point<T, D> > cpu_tmp_points; // for cpu searching
  59 + kdtree::kdnode<T> *root; // root node
  60 + static cpu_kdtree<T, D> *cur_tree_ptr;
  61 + public:
  62 + cpu_kdtree() { // constructor for creating a cpu_kdtree
  63 + cur_tree_ptr = this; // create a class pointer points to the current class value
  64 + n_id = 0; // set total number of points to default 0
  65 + }
  66 + ~cpu_kdtree() { // destructor of cpu_kdtree
  67 + std::vector <kdtree::kdnode<T>*> next_nodes;
  68 + next_nodes.push_back(root);
  69 + while (next_nodes.size()) {
  70 + std::vector <kdtree::kdnode<T>*> next_search_nodes;
  71 + while (next_nodes.size()) {
  72 + kdtree::kdnode<T> *cur = next_nodes.back();
  73 + next_nodes.pop_back();
  74 + if (cur->left)
  75 + next_search_nodes.push_back(cur->left);
  76 + if (cur->right)
  77 + next_search_nodes.push_back(cur->right);
  78 + delete cur;
  79 + }
  80 + next_nodes = next_search_nodes;
  81 + }
  82 + root = NULL;
  83 + }
  84 + void cpu_create(std::vector < typename kdtree::point<T, D> > &reference_points, size_t max_levels) {
  85 + tmp_points = &reference_points;
  86 + root = new kdtree::kdnode<T>(); // initializing the root node
  87 + root->idx = n_id++; // the index of root is 0
  88 + root->level = 0; // tree level begins at 0
  89 + root->indices.resize(reference_points.size()); // get the number of points
  90 + for (size_t i = 0; i < reference_points.size(); i++) {
  91 + root->indices[i] = i; // set indices of input points
  92 + }
  93 + std::vector <kdtree::kdnode<T>*> next_nodes; // next nodes
  94 + next_nodes.push_back(root); // push back the root node
  95 + while (next_nodes.size()) {
  96 + std::vector <kdtree::kdnode<T>*> next_search_nodes; // next search nodes
  97 + while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search
  98 + kdtree::kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first)
  99 + next_nodes.pop_back(); // pop out current node in order to store next round of nodes
  100 + if (current_node->level < max_levels) {
  101 + if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point
  102 + kdtree::kdnode<T> *left = new kdtree::kdnode<T>();
  103 + kdtree::kdnode<T> *right = new kdtree::kdnode<T>();
  104 + left->idx = n_id++; // set the index of current node's left node
  105 + right->idx = n_id++;
  106 + split(current_node, left, right); // split left and right and determine a node
  107 + std::vector <size_t> temp; // empty vecters of int
  108 + //temp.resize(current_node->indices.size());
  109 + current_node->indices.swap(temp); // clean up current node's indices
  110 + current_node->left = left;
  111 + current_node->right = right;
  112 + current_node->left_idx = left->idx;
  113 + current_node->right_idx = right->idx;
  114 + if (right->indices.size())
  115 + next_search_nodes.push_back(right); // left pop out first
  116 + if (left->indices.size())
  117 + next_search_nodes.push_back(left);
  118 + }
  119 + }
  120 + }
  121 + next_nodes = next_search_nodes; // go deeper within the tree
  122 + }
  123 + }
  124 + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort
  125 + std::vector < typename kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer
  126 + return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis];
  127 + }
  128 + void split(kdtree::kdnode<T> *cur, kdtree::kdnode<T> *left, kdtree::kdnode<T> *right) {
  129 + std::vector < typename kdtree::point<T, D> > &pts = *tmp_points;
  130 + current_axis = cur->level % D; // indicate the judicative dimension or axis
  131 + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data
  132 + size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor
  133 + cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node
  134 + left->parent = cur; // set the parent of the next search nodes to current node
  135 + right->parent = cur;
  136 + left->level = cur->level + 1; // level + 1
  137 + right->level = cur->level + 1;
  138 + left->parent_idx = cur->idx; // set its parent node's index
  139 + right->parent_idx = cur->idx;
  140 + for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one
  141 + size_t idx = cur->indices[i];
  142 + if (pts[idx].dim[current_axis] < cur->split_value)
  143 + left->indices.push_back(idx);
  144 + else
  145 + right->indices.push_back(idx);
  146 + }
  147 + }
  148 + void create(T *h_reference_points, size_t reference_count, size_t max_levels) {
  149 + std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way
  150 + for (size_t j = 0; j < reference_count; j++)
  151 + for (size_t i = 0; i < D; i++)
  152 + reference_points[j].dim[i] = h_reference_points[j * D + i];
  153 + cpu_create(reference_points, max_levels);
  154 + cpu_tmp_points = *tmp_points;
  155 + }
  156 + int get_num_nodes() const { // get the total number of nodes
  157 + return n_id;
  158 + }
  159 + kdtree::kdnode<T>* get_root() const { // get the root node of tree
  160 + return root;
  161 + }
  162 + T cpu_distance(const kdtree::point<T, D> &a, const kdtree::point<T, D> &b) {
  163 + T distance = 0;
  164 +
  165 + for (size_t i = 0; i < D; i++) {
  166 + T d = a.dim[i] - b.dim[i];
  167 + distance += d*d;
  168 + }
  169 + return distance;
  170 + }
  171 + void cpu_search_at_node(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, size_t *index, T *distance, kdtree::kdnode<T> **node) {
  172 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  173 + size_t best_index = 0;
  174 + std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;
  175 + while (true) {
  176 + size_t split_axis = cur->level % D;
  177 + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace
  178 + *node = cur; // pointer points to a pointer
  179 + for (size_t i = 0; i < cur->indices.size(); i++) {
  180 + size_t idx = cur->indices[i];
  181 + T d = cpu_distance(query, pts[idx]); // compute distances
  182 + /// if we want to compute k nearest neighbor, we can input the last resul
  183 + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k
  184 + if (d < best_distance) {
  185 + best_distance = d;
  186 + best_index = idx; // record the nearest neighbor index
  187 + }
  188 + }
  189 + break; // find the target point then break the loop
  190 + }
  191 + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side
  192 + cur = cur->left;
  193 + }
  194 + else {
  195 + cur = cur->right;
  196 + }
  197 + }
  198 + *index = best_index;
  199 + *distance = best_distance;
  200 + }
  201 + void cpu_search_at_node_range(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, T range, size_t *index, T *distance) {
  202 + T best_distance = FLT_MAX; // initialize the best distance to max of floating point
  203 + size_t best_index = 0;
  204 + std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points;
  205 + std::vector < typename kdtree::kdnode<T>*> next_node;
  206 + next_node.push_back(cur);
  207 + while (next_node.size()) {
  208 + std::vector<typename kdtree::kdnode<T>*> next_search;
  209 + while (next_node.size()) {
  210 + cur = next_node.back();
  211 + next_node.pop_back();
  212 + size_t split_axis = cur->level % D;
  213 + if (cur->left == NULL) {
  214 + for (size_t i = 0; i < cur->indices.size(); i++) {
  215 + size_t idx = cur->indices[i];
  216 + T d = cpu_distance(query, pts[idx]);
  217 + if (d < best_distance) {
  218 + best_distance = d;
  219 + best_index = idx;
  220 + }
  221 + }
  222 + }
  223 + else {
  224 + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension
  225 + /// there are three possibilities: on either left or right, and on both left and right
  226 + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist
  227 + if (d < 0)
  228 + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region
  229 + else
  230 + next_search.push_back(cur->right);
  231 + }
  232 + else { // it is possible that nereast neighbor will appear on both left and right
  233 + next_search.push_back(cur->left);
  234 + next_search.push_back(cur->right);
  235 + }
  236 + }
  237 + }
  238 + next_node = next_search; // pop out at least one time
  239 + }
  240 + *index = best_index;
  241 + *distance = best_distance;
  242 + }
  243 + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) {
  244 + /// first convert the input query point into specific type
  245 + kdtree::point<T, D> query;
  246 + for (size_t j = 0; j < query_count; j++) {
  247 + for (size_t i = 0; i < D; i++)
  248 + query.dim[i] = h_query_points[j * D + i];
  249 + /// find the nearest node, this will be the upper bound for the next time searching
  250 + kdtree::kdnode<T> *best_node = NULL;
  251 + T best_distance = FLT_MAX;
  252 + size_t best_index = 0;
  253 + T radius = 0; // radius for range
  254 + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step
  255 + radius = sqrt(best_distance); // It is possible that nearest will appear in another region
  256 + /// find other possibilities
  257 + kdtree::kdnode<T> *cur = best_node;
  258 + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node
  259 + /// go up
  260 + kdtree::kdnode<T> *parent = cur->parent; // travel back to every node that we pass through
  261 + size_t split_axis = (parent->level) % D;
  262 + /// search other nodes
  263 + size_t tmp_index;
  264 + T tmp_distance = FLT_MAX;
  265 + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) {
  266 + /// search opposite node
  267 + if (parent->left != cur)
  268 + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node
  269 + else
  270 + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance);
  271 + }
  272 + if (tmp_distance < best_distance) {
  273 + best_distance = tmp_distance;
  274 + best_index = tmp_index;
  275 + }
  276 + cur = parent;
  277 + }
  278 + h_indices[j] = best_index;
  279 + h_distances[j] = best_distance;
  280 + }
  281 + }
  282 + }; //end class kdtree
  283 +
  284 + template <typename T, int D>
  285 + cpu_kdtree<T, D>* cpu_kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class
  286 +
  287 + template <typename T>
  288 + struct cuda_kdnode {
  289 + int parent, left, right;
  290 + T split_value;
  291 + size_t num_index; // number of indices it has
  292 + int index; // the beginning index
  293 + size_t level;
  294 + };
  295 +
  296 + template <typename T, int D>
  297 + __device__ T gpu_distance(kdtree::point<T, D> &a, kdtree::point<T, D> &b) {
  298 + T distance = 0;
  299 +
  300 + for (size_t i = 0; i < D; i++) {
  301 + T d = a.dim[i] - b.dim[i];
  302 + distance += d*d;
  303 + }
  304 + return distance;
  305 + }
  306 + template <typename T, int D>
  307 + __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, int cur, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) {
  308 + T best_distance = FLT_MAX;
  309 + size_t best_index = 0;
  310 +
  311 + while (true) { // break until reach the bottom
  312 + int split_axis = nodes[cur].level % D;
  313 + if (nodes[cur].left == -1) { // check whether it has left node or not
  314 + *d_node = cur;
  315 + for (int i = 0; i < nodes[cur].num_index; i++) {
  316 + size_t idx = indices[nodes[cur].index + i];
  317 + T dist = gpu_distance<T, D>(d_query_point, d_reference_points[idx]);
  318 + if (dist < best_distance) {
  319 + best_distance = dist;
  320 + best_index = idx;
  321 + }
  322 + }
  323 + break;
  324 + }
  325 + else if (d_query_point.dim[split_axis] < nodes[cur].split_value) { // jump into specific son node
  326 + cur = nodes[cur].left;
  327 + }
  328 + else {
  329 + cur = nodes[cur].right;
  330 + }
  331 + }
  332 + *d_distance = best_distance;
  333 + *d_index = best_index;
  334 + }
  335 + template <typename T, int D>
  336 + __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
  337 + T best_distance = FLT_MAX;
  338 + size_t best_index = 0;
  339 +
  340 + int next_nodes_pos = 0; // initialize pop out order index
  341 + next_nodes[id * stack_size + next_nodes_pos] = cur; // find data that belongs to the very specific thread
  342 + next_nodes_pos++;
  343 +
  344 + while (next_nodes_pos) {
  345 + int next_search_nodes_pos = 0; // record push back order index
  346 + while (next_nodes_pos) {
  347 + cur = next_nodes[id * stack_size + next_nodes_pos - 1]; // pop out the last push in one and keep poping out
  348 + next_nodes_pos--;
  349 + int split_axis = nodes[cur].level % D;
  350 +
  351 + if (nodes[cur].left == -1) {
  352 + for (int i = 0; i < nodes[cur].num_index; i++) {
  353 + int idx = indices[nodes[cur].index + i]; // all indices are stored in one array, pick up from every node's beginning index
  354 + T d = gpu_distance<T>(d_query_point, d_reference_points[idx]);
  355 + if (d < best_distance) {
  356 + best_distance = d;
  357 + best_index = idx;
  358 + }
  359 + }
  360 + }
  361 + else {
  362 + T d = d_query_point.dim[split_axis] - nodes[cur].split_value;
  363 +
  364 + if (fabs(d) > range) {
  365 + if (d < 0) {
  366 + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left;
  367 + next_search_nodes_pos++;
  368 + }
  369 + else {
  370 + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right;
  371 + next_search_nodes_pos++;
  372 + }
  373 + }
  374 + else {
  375 + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right;
  376 + next_search_nodes_pos++;
  377 + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left;
  378 + next_search_nodes_pos++;
  379 + if (next_search_nodes_pos > stack_size) {
  380 + printf("Thread conflict might be caused by thread %d, so please try smaller input max_tree_levels\n", id);
  381 + (*Judge)++;
  382 + }
  383 + }
  384 + }
  385 + }
  386 + for (int i = 0; i < next_search_nodes_pos; i++)
  387 + next_nodes[id * stack_size + i] = next_search_nodes[id * stack_size + i];
  388 + next_nodes_pos = next_search_nodes_pos;
  389 + }
  390 + *d_distance = best_distance;
  391 + *d_index = best_index;
  392 + }
  393 + template <typename T, int D>
  394 + __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) {
  395 + int best_node = 0;
  396 + T best_distance = FLT_MAX;
  397 + size_t best_index = 0;
  398 + T radius = 0;
  399 +
  400 + search_at_node<T, D>(nodes, indices, d_reference_points, 0, d_query_point, &best_index, &best_distance, &best_node);
  401 + radius = sqrt(best_distance); // get range
  402 + int cur = best_node;
  403 +
  404 + while (nodes[cur].parent != -1) {
  405 + int parent = nodes[cur].parent;
  406 + int split_axis = nodes[parent].level % D;
  407 +
  408 + T tmp_dist = FLT_MAX;
  409 + size_t tmp_idx;
  410 + if (fabs(nodes[parent].split_value - d_query_point.dim[split_axis]) <= radius) {
  411 + if (nodes[parent].left != cur)
  412 + search_at_node_range(nodes, indices, d_reference_points, d_query_point, nodes[parent].left, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge);
  413 + else
  414 + search_at_node_range(nodes, indices, d_reference_points, d_query_point, nodes[parent].right, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge);
  415 + }
  416 + if (tmp_dist < best_distance) {
  417 + best_distance = tmp_dist;
  418 + best_index = tmp_idx;
  419 + }
  420 + cur = parent;
  421 + }
  422 + *d_distance = sqrt(best_distance);
  423 + *d_index = best_index;
  424 + }
  425 + template <typename T, int D>
  426 + __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) {
  427 + size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
  428 + if (idx >= d_query_count) return; // avoid segfault
  429 +
  430 + search<T, D>(nodes, indices, d_reference_points, d_query_points[idx], &d_indices[idx], &d_distances[idx], idx, next_nodes, next_search_nodes, Judge); // every query points are independent
  431 + }
  432 +
  433 + template <typename T, int D = 3>
  434 + class cuda_kdtree {
  435 + protected:
  436 + cuda_kdnode<T> *d_nodes;
  437 + size_t *d_index;
  438 + kdtree::point<T, D>* d_reference_points;
  439 + size_t npts;
  440 + int num_nodes;
  441 + public:
  442 + ~cuda_kdtree() {
  443 + HANDLE_ERROR(cudaFree(d_nodes));
  444 + HANDLE_ERROR(cudaFree(d_index));
  445 + HANDLE_ERROR(cudaFree(d_reference_points));
  446 + }
  447 +
  448 + /// Create a KD-tree given a pointer to an array of reference points and the number of reference points
  449 + /// @param h_reference_points is a host array containing the reference points in (x0, y0, z0, ...., ) order
  450 + /// @param reference_count is the number of reference point in the array
  451 + /// @param max_levels is the deepest number of tree levels allowed
  452 + void create(T *h_reference_points, size_t reference_count, size_t max_levels = 3) {
  453 + if (max_levels > 10) {
  454 + std::cout<<"The max_tree_levels should be smaller!"<<std::endl;
  455 + exit(1);
  456 + }
  457 + //bb.init(&h_reference_points[0]);
  458 + //aaboundingboxing<T, D>(bb, h_reference_points, reference_count);
  459 +
  460 + std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way
  461 + for (size_t j = 0; j < reference_count; j++)
  462 + for (size_t i = 0; i < D; i++)
  463 + reference_points[j].dim[i] = h_reference_points[j * D + i];
  464 + cpu_kdtree<T, D> tree; // creating a tree on cpu
  465 + tree.cpu_create(reference_points, max_levels); // building a tree on cpu
  466 + kdtree::kdnode<T> *d_root = tree.get_root();
  467 + num_nodes = tree.get_num_nodes();
  468 + npts = reference_count; // also equals to reference_count
  469 +
  470 + HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device
  471 + HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts));
  472 + HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(kdtree::point<T, D>) * npts));
  473 +
  474 + std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes);
  475 + std::vector <size_t> indices(npts);
  476 + std::vector <kdtree::kdnode<T>*> next_nodes;
  477 + size_t cur_pos = 0;
  478 + next_nodes.push_back(d_root);
  479 + while (next_nodes.size()) {
  480 + std::vector <typename kdtree::kdnode<T>*> next_search_nodes;
  481 + while (next_nodes.size()) {
  482 + kdtree::kdnode<T> *cur = next_nodes.back();
  483 + next_nodes.pop_back();
  484 + int id = cur->idx; // the nodes at same level are independent
  485 + tmp_nodes[id].level = cur->level;
  486 + tmp_nodes[id].parent = cur->parent_idx;
  487 + tmp_nodes[id].left = cur->left_idx;
  488 + tmp_nodes[id].right = cur->right_idx;
  489 + tmp_nodes[id].split_value = cur->split_value;
  490 + tmp_nodes[id].num_index = cur->indices.size(); // number of index
  491 + if (cur->indices.size()) {
  492 + for (size_t i = 0; i < cur->indices.size(); i++)
  493 + indices[cur_pos + i] = cur->indices[i];
  494 +
  495 + tmp_nodes[id].index = (int)cur_pos; // beginning index of reference_points that every bottom node has
  496 + cur_pos += cur->indices.size(); // store indices continuously for every query_point
  497 + }
  498 + else {
  499 + tmp_nodes[id].index = -1;
  500 + }
  501 +
  502 + if (cur->left)
  503 + next_search_nodes.push_back(cur->left);
  504 +
  505 + if (cur->right)
  506 + next_search_nodes.push_back(cur->right);
  507 + }
  508 + next_nodes = next_search_nodes;
  509 + }
  510 + HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice));
  511 + HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice));
  512 + HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point<T, D>) * reference_points.size(), cudaMemcpyHostToDevice));
  513 + }
  514 +
  515 + /// Search the KD tree for nearest neighbors to a set of specified query points
  516 + /// @param h_query_points an array of query points in (x0, y0, z0, ...) order
  517 + /// @param query_count is the number of query points
  518 + /// @param indices are the indices to the nearest reference point for each query points
  519 + /// @param distances is an array containing the distance between each query point and the nearest reference point
  520 + void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) {
  521 + std::vector < typename kdtree::point<T, D> > query_points(query_count);
  522 + for (size_t j = 0; j < query_count; j++)
  523 + for (size_t i = 0; i < D; i++)
  524 + query_points[j].dim[i] = h_query_points[j * D + i];
  525 +
  526 + unsigned int threads = (unsigned int)(query_points.size() > 1024 ? 1024 : query_points.size());
  527 + unsigned int blocks = (unsigned int)(query_points.size() / threads + (query_points.size() % threads ? 1 : 0));
  528 +
  529 + kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu
  530 + size_t *d_indices;
  531 + T *d_distances;
  532 +
  533 + int *next_nodes; // create two STACK-like array
  534 + int *next_search_nodes;
  535 +
  536 + int *Judge = NULL; // judge variable to see whether one thread is overwrite another thread's memory
  537 +
  538 + HANDLE_ERROR(cudaMalloc((void**)&d_query_points, sizeof(T) * query_points.size() * D));
  539 + HANDLE_ERROR(cudaMalloc((void**)&d_indices, sizeof(size_t) * query_points.size()));
  540 + HANDLE_ERROR(cudaMalloc((void**)&d_distances, sizeof(T) * query_points.size()));
  541 + HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * stack_size * sizeof(int))); // STACK size right now is 50, you can change it if you mean to
  542 + HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * stack_size * sizeof(int)));
  543 + HANDLE_ERROR(cudaMemcpy(d_query_points, &query_points[0], sizeof(T) * query_points.size() * D, cudaMemcpyHostToDevice));
  544 +
  545 + search_batch<<<blocks, threads>>> (d_nodes, d_index, d_reference_points, d_query_points, query_points.size(), d_indices, d_distances, next_nodes, next_search_nodes, Judge);
  546 +
  547 + if (Judge == NULL) { // do the following work if the thread works safely
  548 + HANDLE_ERROR(cudaMemcpy(indices, d_indices, sizeof(size_t) * query_points.size(), cudaMemcpyDeviceToHost));
  549 + HANDLE_ERROR(cudaMemcpy(distances, d_distances, sizeof(T) * query_points.size(), cudaMemcpyDeviceToHost));
  550 + }
  551 +
  552 + HANDLE_ERROR(cudaFree(next_nodes));
  553 + HANDLE_ERROR(cudaFree(next_search_nodes));
  554 + HANDLE_ERROR(cudaFree(d_query_points));
  555 + HANDLE_ERROR(cudaFree(d_indices));
  556 + HANDLE_ERROR(cudaFree(d_distances));
  557 + }
  558 +
  559 + /// Return the number of points in the KD tree
  560 + size_t num_points() {
  561 + return npts;
  562 + }
  563 +
  564 + stim::aabbn<T, D> getbox() {
  565 + size_t N = npts;
  566 + //std::vector < typename kdtree::point<T, D> > cpu_ref(npts); //allocate space on the CPU for the reference points
  567 + T* cpu_ref = (T*)malloc(N * D * sizeof(T)); //allocate space on the CPU for the reference points
  568 + HANDLE_ERROR(cudaMemcpy(cpu_ref, d_reference_points, N * D * sizeof(T), cudaMemcpyDeviceToHost)); //copy from GPU to CPU
  569 +
  570 + stim::aabbn<T, D> bb(cpu_ref);
  571 +
  572 + for (size_t i = 1; i < N; i++) { //for each reference point
  573 + //std::cout << "( " << cpu_ref[i * D + 0] << ", " << cpu_ref[i * D + 1] << ", " << cpu_ref[i * D + 2] << ")" << std::endl;
  574 + bb.insert(&cpu_ref[i * D]);
  575 + }
  576 + return bb;
  577 + }
  578 +
  579 + //generate an implicit distance field for the KD-tree
  580 + void dist_field3(T* dist, size_t* dims, stim::aabbn<T, 3> bb) {
  581 + size_t N = 1; //number of query points that make up the distance field
  582 + for (size_t d = 0; d < 3; d++) N *= dims[d]; //calculate the total number of query points
  583 +
  584 + //calculate the grid spatial parameters
  585 + T dx = 0;
  586 + if (dims[0] > 1) dx = bb.length(0) / dims[0];
  587 + T dy = 0;
  588 + if (dims[1] > 1) dy = bb.length(1) / dims[1];
  589 + T dz = 0;
  590 + if (dims[2] > 1) dz = bb.length(2) / dims[2];
  591 +
  592 + T* Q = (T*)malloc(N * 3 * sizeof(T)); //allocate space for the query points
  593 + size_t i;
  594 + for (size_t z = 0; z < dims[2]; z++) { //for each query point (which is a point in the grid)
  595 + for (size_t y = 0; y < dims[1]; y++) {
  596 + for (size_t x = 0; x < dims[0]; x++) {
  597 + i = z * dims[1] * dims[0] + y * dims[0] + x;
  598 + Q[i * 3 + 0] = bb.low[0] + x * dx + dx / 2;
  599 + Q[i * 3 + 1] = bb.low[1] + y * dy + dy / 2;
  600 + Q[i * 3 + 2] = bb.low[2] + z * dz + dz / 2;
  601 + //std::cout << i<<" "<<Q[i * 3 + 0] << " " << Q[i * 3 + 1] << " " << Q[i * 3 + 2] << std::endl;
  602 + }
  603 + }
  604 + }
  605 + size_t* temp = (size_t*)malloc(N * sizeof(size_t)); //allocate space to store the indices (unused)
  606 + search(Q, N, temp, dist);
  607 + }
  608 +
  609 + //generate an implicit distance field for the KD-tree
  610 + void dist_field3(T* dist, size_t* dims) {
  611 + stim::aabbn<T, D> bb = getbox(); //get a bounding box around the tree
  612 + dist_field3(dist, dims, bb);
  613 + }
  614 +
  615 + };
  616 +} //end namespace stim
  617 +#endif
0 618 \ No newline at end of file
... ...
stim/util/filesize.h 0 → 100644
  1 +#ifndef STIM_UTIL_FILESIZE_H
  2 +#define STIM_UTIL_FILESIZE_H
  3 +
  4 +#ifdef _WIN32
  5 +#include <Windows.h>
  6 +#else
  7 +#include <sys/types.h>
  8 +#include <sys/stat.h>
  9 +#endif
  10 +
  11 +namespace stim{
  12 +static size_t file_size(std::string filename){
  13 +#ifdef _WIN32
  14 + HANDLE hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
  15 + if(hFile == INVALID_HANDLE_VALUE) return 0;
  16 + LARGE_INTEGER size;
  17 + if(!GetFileSizeEx(hFile, &size)){
  18 + CloseHandle(hFile);
  19 + return 0;
  20 + }
  21 + CloseHandle(hFile);
  22 + return (size_t)size.QuadPart;
  23 +#else
  24 + struct stat sb;
  25 + stat(filename.c_str(), &sb);
  26 + return sb.st_size;
  27 +#endif
  28 +}
  29 +
  30 +} //end namespace stim
  31 +
  32 +
  33 +
  34 +#endif
... ...
stim/visualization/aabb3.h
... ... @@ -2,51 +2,31 @@
2 2 #define STIM_AABB3_H
3 3  
4 4 #include <stim/cuda/cudatools/callable.h>
  5 +#include <stim/visualization/aabbn.h>
5 6  
6 7 namespace stim{
7 8  
8   -/// Structure for a 3D axis aligned bounding box
  9 + template<typename T>
  10 + using aabb3 = aabbn<T, 3>;
  11 +/*/// Structure for a 3D axis aligned bounding box
9 12 template<typename T>
10   -struct aabb3{
11   -
12   -//protected:
13   -
14   - T low[3]; //top left corner position
15   - T high[3]; //dimensions along x and y and z
16   -
17   -//public:
18   -
19   - CUDA_CALLABLE aabb3(T x, T y, T z){ //initialize an axis aligned bounding box of size 0 at the given position
20   - low[0] = high[0] = x; //set the position to the user specified coordinates
21   - low[1] = high[1] = y;
22   - low[2] = high[2] = z;
  13 +struct aabb3 : public aabbn<T, 3>{
  14 +
  15 + aabb3() : aabbn() {}
  16 + aabb3(T x0, T y0, T z0, T x1, T y1, T z1){
  17 + low[0] = x0;
  18 + low[1] = y0;
  19 + low[2] = z0;
  20 + high[0] = x0;
  21 + high[1] = x1;
  22 + high[2] = x2;
23 23 }
24 24  
25   - //insert a point into the bounding box, growing the box appropriately
26   - CUDA_CALLABLE void insert(T x, T y, T z){
27   - if(x < low[0]) low[0] = x;
28   - if(y < low[1]) low[1] = y;
29   - if(z < low[2]) low[2] = z;
30   -
31   - if(x > high[0]) high[0] = x;
32   - if(y > high[1]) high[1] = y;
33   - if(z > high[2]) high[2] = z;
34   - }
35   -
36   - //trim the bounding box so that the lower bounds are (x, y, z)
37   - CUDA_CALLABLE void trim_low(T x, T y, T z){
38   - if(low[0] < x) low[0] = x;
39   - if(low[1] < y) low[1] = y;
40   - if(low[2] < z) low[2] = z;
41   - }
  25 + aabb3 aabbn<T, 3>() {
42 26  
43   - CUDA_CALLABLE void trim_high(T x, T y, T z){
44   - if(high[0] > x) high[0] = x;
45   - if(high[1] > y) high[1] = y;
46   - if(high[2] > z) high[2] = z;
47 27 }
48 28  
49   -};
  29 +};*/
50 30  
51 31 }
52 32  
... ...
stim/visualization/aabbn.h 0 → 100644
  1 +#ifndef STIM_AABBN_H
  2 +#define STIM_AABBN_H
  3 +
  4 +#include <vector>
  5 +#include <stim/cuda/cudatools/callable.h>
  6 +
  7 +namespace stim{
  8 +
  9 +/// Structure for a 3D axis aligned bounding box
  10 +template<typename T, size_t D>
  11 +struct aabbn{
  12 +
  13 +//protected:
  14 +
  15 + T low[D]; //top left corner position
  16 + T high[D]; //dimensions along x and y and z
  17 +
  18 + CUDA_CALLABLE void init(T* i) {
  19 + for (size_t d = 0; d < D; d++)
  20 + low[d] = high[d] = i[d];
  21 + }
  22 +
  23 + CUDA_CALLABLE aabbn() {}
  24 + CUDA_CALLABLE aabbn(T* i) {
  25 + init(i);
  26 + }
  27 +
  28 + CUDA_CALLABLE aabbn(T x0, T x1) {
  29 + low[0] = x0;
  30 + high[0] = x1;
  31 + }
  32 +
  33 + CUDA_CALLABLE aabbn(T x0, T y0, T x1, T y1) : aabbn(x0, x1) {
  34 + low[1] = y0;
  35 + high[1] = y1;
  36 + }
  37 +
  38 + CUDA_CALLABLE aabbn(T x0, T y0, T z0, T x1, T y1, T z1) : aabbn(x0, y0, x1, y1) {
  39 + low[2] = z0;
  40 + high[2] = z1;
  41 + }
  42 +
  43 +
  44 + //insert a point into the bounding box, growing the box appropriately
  45 + CUDA_CALLABLE void insert(T* p){
  46 + for(size_t d = 0; d < D; d++){
  47 + if(p[d] < low[d]) low[d] = p[d];
  48 + if(p[d] > high[d]) high[d] = p[d];
  49 + }
  50 + }
  51 +
  52 + //trim the bounding box so that the lower bounds are b(x, y, z, ...)
  53 + CUDA_CALLABLE void trim_low(T* b){
  54 + for(size_t d = 0; d < D; d++)
  55 + if(low[d] < b[d]) low[d] = b[d];
  56 + }
  57 +
  58 + CUDA_CALLABLE void trim_high(T* b){
  59 + for(size_t d = 0; d < D; d++)
  60 + if(low[d] > b[d]) low[d] = b[d];
  61 + }
  62 +
  63 + CUDA_CALLABLE T length(size_t d) {
  64 + return high[d] - low[d];
  65 + }
  66 +
  67 + CUDA_CALLABLE aabbn<T, D> operator*(T s) {
  68 + aabbn<T, D> newbox;
  69 + for (size_t d = 0; d < D; d++) {
  70 + T c = (low[d] + high[d]) / 2;
  71 + T l = high[d] - low[d];
  72 + newbox.low[d] = c - l * s / 2;
  73 + newbox.high[d] = c + l * s / 2;
  74 + }
  75 + return newbox;
  76 + }
  77 +
  78 + //translate the box along dimension d a distance of v
  79 + CUDA_CALLABLE void translate(size_t d, T v) {
  80 + for (size_t d = 0; d < D; d++) {
  81 + low[d] += v;
  82 + high[d] += v;
  83 + }
  84 + }
  85 +
  86 +};
  87 +
  88 +}
  89 +
  90 +
  91 +#endif
0 92 \ No newline at end of file
... ...
stim/visualization/cylinder.h
... ... @@ -4,6 +4,9 @@
4 4 #include <stim/math/circle.h>
5 5 #include <stim/biomodels/centerline.h>
6 6  
  7 +/*
  8 +
  9 +*/
7 10  
8 11 namespace stim
9 12 {
... ... @@ -12,13 +15,13 @@ class cylinder
12 15 : public centerline<T>
13 16 {
14 17 private:
15   - stim::circle<T> s; //an arbitrary circle
16   - std::vector<stim::circle<T> > e; //an array of circles that store the centerline
  18 + stim::circle<T> s; //an arbitrary circle
  19 + std::vector<stim::circle<T> > e; //an array of circles that store the centerline
17 20  
18 21 std::vector<stim::vec3<T> > norms;
19 22 std::vector<stim::vec<T> > Us;
20   - std::vector<stim::vec<T> > mags;
21   - std::vector< T > L; //length of the cylinder at each position.
  23 + std::vector<stim::vec<T> > mags; //stores a list of magnitudes for each point in the centerline (assuming mags[0] is the radius)
  24 + std::vector< T > L; //length of the cylinder at each position (pre-integration)
22 25  
23 26  
24 27 using stim::centerline<T>::c;
... ... @@ -61,9 +64,9 @@ class cylinder
61 64 return;
62 65  
63 66 //calculate each L.
64   - L.resize(inP.size());
65   - T temp = (T)0;
66   - L[0] = 0;
  67 + L.resize(inP.size()); //the number of precomputed lengths will equal the number of points
  68 + T temp = (T)0; //length up to that point
  69 + L[0] = temp;
67 70 for(size_t i = 1; i < L.size(); i++)
68 71 {
69 72 temp += (inP[i-1] - inP[i]).len();
... ... @@ -234,7 +237,7 @@ class cylinder
234 237 cylinder(std::vector< stim::vec3<T> > inP)
235 238 : centerline<T>(inP)
236 239 {
237   - std::vector< T > inM; //create an array of arbitrary magnitudes
  240 + std::vector< stim::vec<T> > inM; //create an array of arbitrary magnitudes
238 241  
239 242 stim::vec<T> zero;
240 243 zero.push_back(0);
... ... @@ -476,30 +479,30 @@ class cylinder
476 479  
477 480 std::vector< vec3<T> > result;
478 481  
479   - vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline
  482 + vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline
480 483 vec3<T> p1;
481   - unsigned N = size(); //number of points in the current centerline
  484 + unsigned N = size(); //number of points in the current centerline
482 485  
483 486 //for each line segment on the centerline
484 487 for(unsigned int i = 1; i < N; i++){
485   - p1 = e[i].P; //get the second point in the line segment
  488 + p1 = e[i].P; //get the second point in the line segment
486 489  
487   - vec3<T> v = p1 - p0; //calculate the vector between these two points
488   - T d = v.len(); //calculate the distance between these two points (length of the line segment)
  490 + vec3<T> v = p1 - p0; //calculate the vector between these two points
  491 + T d = v.len(); //calculate the distance between these two points (length of the line segment)
489 492  
490 493 size_t nsteps = (size_t)std::ceil(d / spacing); //calculate the number of steps to take along the segment to meet the spacing criteria
491   - T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points
  494 + T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points
492 495  
493 496 //for each step along the line segment
494 497 for(unsigned s = 0; s < nsteps; s++){
495   - T alpha = stepsize * s; //calculate the fraction of the distance along the line segment covered
496   - result.push_back(p0 + alpha * v); //push the point at alpha position along the line segment
  498 + T alpha = stepsize * s; //calculate the fraction of the distance along the line segment covered
  499 + result.push_back(p0 + alpha * v); //push the point at alpha position along the line segment
497 500 }
498 501  
499   - p0 = p1; //shift the points to move to the next line segment
  502 + p0 = p1; //shift the points to move to the next line segment
500 503 }
501 504  
502   - result.push_back(e[size() - 1].P); //push the last point in the centerline
  505 + result.push_back(e[size() - 1].P); //push the last point in the centerline
503 506  
504 507 return cylinder<T>(result);
505 508  
... ...