Commit 39a92d0390bbd9240b2493d5a98afd9f17f0633b
Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib into Graph
Showing
51 changed files
with
5615 additions
and
339 deletions
Show diff stats
1 | +# | ||
2 | +# Windows users: define the GLEW_PATH environment variable to point | ||
3 | +# to the directory containing: | ||
4 | +# include/fann.h | ||
5 | +# lib/*fann.lib | ||
6 | + | ||
7 | + | ||
8 | +# FANN_FOUND - system has fann | ||
9 | +# FANN_INCLUDE_DIRS - the fann include directory | ||
10 | +# FANN_LIBRARIES - Link these to use fann | ||
11 | +# FANN_DEFINITIONS - Compiler switches required for using fann | ||
12 | +# | ||
13 | + | ||
14 | +if(FANN_LIBRARIES AND FANN_INCLUDE_DIRS) | ||
15 | + set(FANN_FOUND TRUE) | ||
16 | +else() | ||
17 | + find_path(FANN_INCLUDE_DIR | ||
18 | + NAMES | ||
19 | + fann.h | ||
20 | + PATHS | ||
21 | + $ENV{FANN_PATH}/include | ||
22 | + ${FANN_DIR}/include | ||
23 | + /usr/include | ||
24 | + /usr/local/include | ||
25 | + /opt/local/include | ||
26 | + /sw/include | ||
27 | + ) | ||
28 | + | ||
29 | + set( _libraries fann doublefann fixedfann floatfann ) | ||
30 | + | ||
31 | + foreach( _lib ${_libraries} ) | ||
32 | + string( TOUPPER ${_lib} _name ) | ||
33 | + | ||
34 | + find_library(${_name}_LIBRARY | ||
35 | + NAMES | ||
36 | + ${_lib} | ||
37 | + PATHS | ||
38 | + $ENV{FANN_PATH}/lib | ||
39 | + ${FANN_DIR}/lib | ||
40 | + /usr/lib | ||
41 | + /usr/local/lib | ||
42 | + /opt/local/lib | ||
43 | + /sw/lib | ||
44 | + ) | ||
45 | + | ||
46 | + endforeach() | ||
47 | + | ||
48 | + | ||
49 | + set(FANN_INCLUDE_DIRS | ||
50 | + ${FANN_INCLUDE_DIR} | ||
51 | + ) | ||
52 | + | ||
53 | + set(FANN_LIBRARIES | ||
54 | + ${FANN_LIBRARIES} | ||
55 | + ${FANN_LIBRARY} | ||
56 | + ${DOUBLEFANN_LIBRARY} | ||
57 | + ${FIXEDFANN_LIBRARY} | ||
58 | + ${FLOATFANN_LIBRARY} | ||
59 | + ) | ||
60 | + | ||
61 | + if( UNIX ) | ||
62 | + set( FANN_LIBRARIES ${FANN_LIBRARIES} m ) | ||
63 | + endif() | ||
64 | + | ||
65 | + if(FANN_INCLUDE_DIRS AND FANN_LIBRARIES) | ||
66 | + set(FANN_FOUND TRUE) | ||
67 | + endif() | ||
68 | + | ||
69 | + if(FANN_FOUND) | ||
70 | + if(NOT FANN_FIND_QUIETLY) | ||
71 | + message(STATUS "Found FANN:") | ||
72 | + message(STATUS "FANN_INCLUDE_DIRS: ${FANN_INCLUDE_DIRS}") | ||
73 | + message(STATUS "FANN_LIBRARIES: ${FANN_LIBRARIES}") | ||
74 | + endif() | ||
75 | + else() | ||
76 | + if(FANN_FIND_REQUIRED) | ||
77 | + message(FATAL_ERROR "Could not find FANN") | ||
78 | + endif() | ||
79 | + endif() | ||
80 | + | ||
81 | + mark_as_advanced(FANN_INCLUDE_DIRS FANN_LIBRARIES) | ||
82 | +endif() |
1 | +# Copyright (c) 2012-2016 DreamWorks Animation LLC | ||
2 | +# | ||
3 | +# All rights reserved. This software is distributed under the | ||
4 | +# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ ) | ||
5 | +# | ||
6 | +# Redistributions of source code must retain the above copyright | ||
7 | +# and license notice and the following restrictions and disclaimer. | ||
8 | +# | ||
9 | +# * Neither the name of DreamWorks Animation nor the names of | ||
10 | +# its contributors may be used to endorse or promote products derived | ||
11 | +# from this software without specific prior written permission. | ||
12 | +# | ||
13 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
14 | +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
15 | +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
16 | +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
17 | +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL, | ||
18 | +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
19 | +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
20 | +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
21 | +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
22 | +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
23 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
24 | +# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE | ||
25 | +# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00. | ||
26 | +# | ||
27 | + | ||
28 | +#-*-cmake-*- | ||
29 | +# - Find GLEW | ||
30 | +# | ||
31 | +# Author : Nicholas Yue yue.nicholas@gmail.com | ||
32 | +# | ||
33 | +# This auxiliary CMake file helps in find the GLEW headers and libraries | ||
34 | +# | ||
35 | +# GLEW_FOUND set if Glew is found. | ||
36 | +# GLEW_INCLUDE_DIR GLEW's include directory | ||
37 | +# GLEW_glew_LIBRARY GLEW libraries | ||
38 | +# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context) | ||
39 | + | ||
40 | +FIND_PACKAGE ( PackageHandleStandardArgs ) | ||
41 | + | ||
42 | +FIND_PATH( GLEW_LOCATION include/GL/glew.h | ||
43 | + "$ENV{GLEW_ROOT}" | ||
44 | + NO_DEFAULT_PATH | ||
45 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
46 | + ) | ||
47 | + | ||
48 | +FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW | ||
49 | + REQUIRED_VARS GLEW_LOCATION | ||
50 | + ) | ||
51 | + | ||
52 | +IF ( GLEW_LOCATION ) | ||
53 | + | ||
54 | + SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path") | ||
55 | + | ||
56 | + SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) | ||
57 | + IF (GLEW_USE_STATIC_LIBS) | ||
58 | + IF (APPLE) | ||
59 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") | ||
60 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | ||
61 | + NO_DEFAULT_PATH | ||
62 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
63 | + ) | ||
64 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | ||
65 | + NO_DEFAULT_PATH | ||
66 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
67 | + ) | ||
68 | + # MESSAGE ( "APPLE STATIC" ) | ||
69 | + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} ) | ||
70 | + ELSEIF (WIN32) | ||
71 | + # Link library | ||
72 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") | ||
73 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib ) | ||
74 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib ) | ||
75 | + ELSE (APPLE) | ||
76 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") | ||
77 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | ||
78 | + NO_DEFAULT_PATH | ||
79 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
80 | + ) | ||
81 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | ||
82 | + NO_DEFAULT_PATH | ||
83 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
84 | + ) | ||
85 | + # MESSAGE ( "LINUX STATIC" ) | ||
86 | + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} ) | ||
87 | + ENDIF (APPLE) | ||
88 | + ELSE () | ||
89 | + IF (APPLE) | ||
90 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib") | ||
91 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib ) | ||
92 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib ) | ||
93 | + ELSEIF (WIN32) | ||
94 | + # Link library | ||
95 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") | ||
96 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib ) | ||
97 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib ) | ||
98 | + # Load library | ||
99 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll") | ||
100 | + FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin | ||
101 | + NO_DEFAULT_PATH | ||
102 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
103 | + ) | ||
104 | + FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin | ||
105 | + NO_DEFAULT_PATH | ||
106 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
107 | + ) | ||
108 | + ELSE (APPLE) | ||
109 | + # Unices | ||
110 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | ||
111 | + NO_DEFAULT_PATH | ||
112 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
113 | + ) | ||
114 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | ||
115 | + NO_DEFAULT_PATH | ||
116 | + NO_SYSTEM_ENVIRONMENT_PATH | ||
117 | + ) | ||
118 | + ENDIF (APPLE) | ||
119 | + ENDIF () | ||
120 | + # MUST reset | ||
121 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES}) | ||
122 | + | ||
123 | + SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library") | ||
124 | + SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library") | ||
125 | + | ||
126 | +ENDIF () |
1 | +#.rst: | ||
2 | +# FindGLUT | ||
3 | +# -------- | ||
4 | +# | ||
5 | +# try to find glut library and include files. | ||
6 | +# | ||
7 | +# IMPORTED Targets | ||
8 | +# ^^^^^^^^^^^^^^^^ | ||
9 | +# | ||
10 | +# This module defines the :prop_tgt:`IMPORTED` targets: | ||
11 | +# | ||
12 | +# ``GLUT::GLUT`` | ||
13 | +# Defined if the system has GLUT. | ||
14 | +# | ||
15 | +# Result Variables | ||
16 | +# ^^^^^^^^^^^^^^^^ | ||
17 | +# | ||
18 | +# This module sets the following variables: | ||
19 | +# | ||
20 | +# :: | ||
21 | +# | ||
22 | +# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc. | ||
23 | +# GLUT_LIBRARIES, the libraries to link against | ||
24 | +# GLUT_FOUND, If false, do not try to use GLUT. | ||
25 | +# | ||
26 | +# Also defined, but not for general use are: | ||
27 | +# | ||
28 | +# :: | ||
29 | +# | ||
30 | +# GLUT_glut_LIBRARY = the full path to the glut library. | ||
31 | +# GLUT_Xmu_LIBRARY = the full path to the Xmu library. | ||
32 | +# GLUT_Xi_LIBRARY = the full path to the Xi Library. | ||
33 | + | ||
34 | +#============================================================================= | ||
35 | +# Copyright 2001-2009 Kitware, Inc. | ||
36 | +# | ||
37 | +# Distributed under the OSI-approved BSD License (the "License"); | ||
38 | +# see accompanying file Copyright.txt for details. | ||
39 | +# | ||
40 | +# This software is distributed WITHOUT ANY WARRANTY; without even the | ||
41 | +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
42 | +# See the License for more information. | ||
43 | +#============================================================================= | ||
44 | +# (To distribute this file outside of CMake, substitute the full | ||
45 | +# License text for the above reference.) | ||
46 | + | ||
47 | +if (WIN32) | ||
48 | + find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h | ||
49 | + PATHS $ENV{GLUT_ROOT_PATH}/include ) | ||
50 | + | ||
51 | + if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | ||
52 | + find_library( GLUT_glut_LIBRARY NAMES freeglut | ||
53 | + PATHS | ||
54 | + $ENV{GLUT_ROOT_PATH}/lib/x64 | ||
55 | + | ||
56 | + NO_DEFAULT_PATH | ||
57 | + ) | ||
58 | + else( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | ||
59 | + find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut | ||
60 | + PATHS | ||
61 | + ${OPENGL_LIBRARY_DIR} | ||
62 | + $ENV{GLUT_ROOT_PATH}/lib | ||
63 | + ) | ||
64 | + endif( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | ||
65 | + | ||
66 | +else () | ||
67 | + | ||
68 | + if (APPLE) | ||
69 | + find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR}) | ||
70 | + find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX") | ||
71 | + find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX") | ||
72 | + | ||
73 | + if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa) | ||
74 | + add_library(GLUT::Cocoa UNKNOWN IMPORTED) | ||
75 | + # Cocoa should always be a Framework, but we check to make sure. | ||
76 | + if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$") | ||
77 | + set_target_properties(GLUT::Cocoa PROPERTIES | ||
78 | + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}") | ||
79 | + else() | ||
80 | + set_target_properties(GLUT::Cocoa PROPERTIES | ||
81 | + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}") | ||
82 | + endif() | ||
83 | + endif() | ||
84 | + else () | ||
85 | + | ||
86 | + if (BEOS) | ||
87 | + | ||
88 | + set(_GLUT_INC_DIR /boot/develop/headers/os/opengl) | ||
89 | + set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86) | ||
90 | + | ||
91 | + else() | ||
92 | + | ||
93 | + find_library( GLUT_Xi_LIBRARY Xi | ||
94 | + /usr/openwin/lib | ||
95 | + ) | ||
96 | + | ||
97 | + find_library( GLUT_Xmu_LIBRARY Xmu | ||
98 | + /usr/openwin/lib | ||
99 | + ) | ||
100 | + | ||
101 | + if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi) | ||
102 | + add_library(GLUT::Xi UNKNOWN IMPORTED) | ||
103 | + set_target_properties(GLUT::Xi PROPERTIES | ||
104 | + IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}") | ||
105 | + endif() | ||
106 | + | ||
107 | + if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu) | ||
108 | + add_library(GLUT::Xmu UNKNOWN IMPORTED) | ||
109 | + set_target_properties(GLUT::Xmu PROPERTIES | ||
110 | + IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}") | ||
111 | + endif() | ||
112 | + | ||
113 | + endif () | ||
114 | + | ||
115 | + find_path( GLUT_INCLUDE_DIR GL/glut.h | ||
116 | + /usr/include/GL | ||
117 | + /usr/openwin/share/include | ||
118 | + /usr/openwin/include | ||
119 | + /opt/graphics/OpenGL/include | ||
120 | + /opt/graphics/OpenGL/contrib/libglut | ||
121 | + ${_GLUT_INC_DIR} | ||
122 | + ) | ||
123 | + | ||
124 | + find_library( GLUT_glut_LIBRARY glut | ||
125 | + /usr/openwin/lib | ||
126 | + ${_GLUT_glut_LIB_DIR} | ||
127 | + ) | ||
128 | + | ||
129 | + unset(_GLUT_INC_DIR) | ||
130 | + unset(_GLUT_glut_LIB_DIR) | ||
131 | + | ||
132 | + endif () | ||
133 | + | ||
134 | +endif () | ||
135 | + | ||
136 | +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR) | ||
137 | + | ||
138 | +if (GLUT_FOUND) | ||
139 | + # Is -lXi and -lXmu required on all platforms that have it? | ||
140 | + # If not, we need some way to figure out what platform we are on. | ||
141 | + set( GLUT_LIBRARIES | ||
142 | + ${GLUT_glut_LIBRARY} | ||
143 | + ${GLUT_Xmu_LIBRARY} | ||
144 | + ${GLUT_Xi_LIBRARY} | ||
145 | + ${GLUT_cocoa_LIBRARY} | ||
146 | + ) | ||
147 | + | ||
148 | + if(NOT TARGET GLUT::GLUT) | ||
149 | + add_library(GLUT::GLUT UNKNOWN IMPORTED) | ||
150 | + set_target_properties(GLUT::GLUT PROPERTIES | ||
151 | + INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}") | ||
152 | + if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$") | ||
153 | + set_target_properties(GLUT::GLUT PROPERTIES | ||
154 | + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}") | ||
155 | + else() | ||
156 | + set_target_properties(GLUT::GLUT PROPERTIES | ||
157 | + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}") | ||
158 | + endif() | ||
159 | + | ||
160 | + if(TARGET GLUT::Xmu) | ||
161 | + set_property(TARGET GLUT::GLUT APPEND | ||
162 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu) | ||
163 | + endif() | ||
164 | + | ||
165 | + if(TARGET GLUT::Xi) | ||
166 | + set_property(TARGET GLUT::GLUT APPEND | ||
167 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi) | ||
168 | + endif() | ||
169 | + | ||
170 | + if(TARGET GLUT::Cocoa) | ||
171 | + set_property(TARGET GLUT::GLUT APPEND | ||
172 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa) | ||
173 | + endif() | ||
174 | + endif() | ||
175 | + | ||
176 | + #The following deprecated settings are for backwards compatibility with CMake1.4 | ||
177 | + set (GLUT_LIBRARY ${GLUT_LIBRARIES}) | ||
178 | + set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR}) | ||
179 | +endif() | ||
180 | + | ||
181 | +mark_as_advanced( | ||
182 | + GLUT_INCLUDE_DIR | ||
183 | + GLUT_glut_LIBRARY | ||
184 | + GLUT_Xmu_LIBRARY | ||
185 | + GLUT_Xi_LIBRARY | ||
186 | + ) |
cmake/FindSTIM.cmake
1 | -include(FindPackageHandleStandardArgs) | ||
2 | - | ||
3 | -set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH}) | ||
4 | - | ||
5 | -find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) | ||
6 | - | ||
7 | -if(STIM_FOUND) | ||
8 | - set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR}) | ||
9 | -endif() | ||
10 | \ No newline at end of file | 1 | \ No newline at end of file |
2 | +# finds the STIM library (downloads it if it isn't present) | ||
3 | +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository) | ||
4 | + | ||
5 | +include(FindPackageHandleStandardArgs) | ||
6 | + | ||
7 | +set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH}) | ||
8 | + | ||
9 | +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) | ||
10 | + | ||
11 | +if(STIM_FOUND) | ||
12 | + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR}) | ||
13 | +elseif(STIM_FOUND) | ||
14 | + #if the STIM library isn't found, download it | ||
15 | + #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists | ||
16 | + #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git") | ||
17 | + #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) | ||
18 | + #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH) | ||
19 | + message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.") | ||
20 | + message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib") | ||
21 | +endif(STIM_FOUND) | ||
22 | + | ||
23 | +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) |
1 | +function T = bsq2tensorflow(I, n) | ||
2 | + | ||
3 | + sx = size(I, 1); | ||
4 | + sy = size(I, 2) / n; %get the size of the tensor along Y | ||
5 | + sb = size(I, 3); | ||
6 | + | ||
7 | + T = zeros(sx * sy * sb, n); %allocate space for the output matrix | ||
8 | + for i = 0:n-1 | ||
9 | + ti = I(:, i * sy + 1 : i * sy + sy, :); | ||
10 | + T(:, i+1) = ti(:); | ||
11 | + end | ||
12 | +end | ||
13 | + | ||
14 | + | ||
0 | \ No newline at end of file | 15 | \ No newline at end of file |
matlab/enviLoadRaw.m
1 | %loads an ENVI file without any manipulation (changing orientation) | 1 | %loads an ENVI file without any manipulation (changing orientation) |
2 | +% enviLoadRaw(filename, headername) | ||
2 | function M = enviLoadRaw(filename, headername) | 3 | function M = enviLoadRaw(filename, headername) |
3 | 4 | ||
4 | %if a header isn't provided, assume it's just the filename | 5 | %if a header isn't provided, assume it's just the filename |
matlab/enviSaveRaw.m
1 | %saves an ENVI file without any manipulation, assumes (X, Y, S) | 1 | %saves an ENVI file without any manipulation, assumes (X, Y, S) |
2 | +% enviSaveRaw(M, filename, headername) | ||
2 | function enviSaveRaw(M, filename, headername) | 3 | function enviSaveRaw(M, filename, headername) |
3 | 4 | ||
4 | %if a header isn't provided, assume it's just the filename | 5 | %if a header isn't provided, assume it's just the filename |
1 | +% Read images of TIFF, SPE2.2(WinSpec) and SPE3.0(Lightfield) | ||
2 | +% Version: JTL Jun-9-2016 | ||
3 | +% ----------------- READ THIS FIRST !!!!! -------------------------------- | ||
4 | +% Change the file name to "readspe" before use | ||
5 | +% Example: | ||
6 | +% Z = readspe(filename) | ||
7 | +% Z = readspe(filename,'info') | ||
8 | +% Z = readspe(filename,frame_index) | ||
9 | +% Z = readspe(filename,frame_index,'info') | ||
10 | +% Input: | ||
11 | +% filename - filename string, e.g. 'image.spe' | ||
12 | +% frame_index - frame index, start from 1 | ||
13 | +% If you have multiple frames, use a "for" loop | ||
14 | +% 'info' - flag to show file info, i.e. dimension, number of frames, version | ||
15 | +% Output: | ||
16 | +% Z - UINT16 image (convert to double if you need) | ||
17 | +% ------------------------------------------------------------------------ | ||
18 | +% Z = readspe (filename,frame_index,'info') | ||
19 | +function Z = readspe (filename,varargin) | ||
20 | + | ||
21 | +if exist(filename) == 2 | ||
22 | + | ||
23 | + Nfr = 1; % default read first frame | ||
24 | + if nargin >1 | ||
25 | + if isa(varargin{1},'numeric') | ||
26 | + Nfr = varargin{1}; | ||
27 | + end | ||
28 | + end | ||
29 | + | ||
30 | + [~,name,ext] = fileparts(filename); | ||
31 | + switch upper(ext) | ||
32 | + case '.TIFF' | ||
33 | + file_ver = 'TIFF'; | ||
34 | + Z = imread(filename); | ||
35 | + [Y,X] = size(Z); | ||
36 | + % datatype = class(Z) | ||
37 | + | ||
38 | + case '.SPE' | ||
39 | + fid = fopen(filename); | ||
40 | + I = fread(fid,Inf,'uint8'); | ||
41 | + X = double(typecast(uint8(I(43:44)),'uint16')); | ||
42 | + Y = double(typecast(uint8(I(657:658)),'uint16')); | ||
43 | + fr = typecast(uint8(I(1447:1450)),'int32'); | ||
44 | + spe_ver = typecast(uint8(I(1993:1996)),'single'); | ||
45 | + file_ver = ['SPE ' num2str(spe_ver)]; | ||
46 | + datatypeN = typecast(uint8(I(109:110)),'int16'); | ||
47 | + switch datatypeN | ||
48 | + case 0 % 32-bit float | ||
49 | + datatype = 'single'; datalength = 4; | ||
50 | + case 1 % 32-bit signed integer | ||
51 | + datatype = 'int32'; datalength = 4; | ||
52 | + case 2 % 16-bit signed integer | ||
53 | + datatype = 'int16'; datalength = 2; | ||
54 | + case 3 % 16-bit unsigned integer | ||
55 | + datatype = 'uint16'; datalength = 2; | ||
56 | + case 8 % 32-bit unsigned integer | ||
57 | + datatype = 'uint32'; datalength = 4; | ||
58 | + end | ||
59 | + % A = I(4101:4100+X*Y*2); % Default read first frame | ||
60 | + A = I(4101+X*Y*datalength*(Nfr-1):4100+X*Y*datalength*Nfr); | ||
61 | + B = typecast(uint8(A),datatype); % important | ||
62 | + Z = reshape(B,X,Y); | ||
63 | + Z = Z'; | ||
64 | + fclose(fid); | ||
65 | + end | ||
66 | + | ||
67 | + if nargin >1 | ||
68 | + if varargin{end} == 'info' | ||
69 | + display(['X = ' num2str(X)]); | ||
70 | + display(['Y = ' num2str(Y)]); | ||
71 | + if(exist('fr','var'));display(['Number of Frames: ' num2str(fr)]);end; | ||
72 | + display(['File version: ' file_ver]); | ||
73 | + end | ||
74 | + end | ||
75 | + | ||
76 | +elseif exist(filename) == 0 | ||
77 | + display('File does not exist!'); | ||
78 | +end | ||
0 | \ No newline at end of file | 79 | \ No newline at end of file |
1 | +function spe2envi(filemask, outfile) | ||
2 | + | ||
3 | + filelist = dir(filemask); | ||
4 | + | ||
5 | + %get a list of date numbers | ||
6 | + datenums = cell2mat({filelist.datenum}); | ||
7 | + | ||
8 | + %sort the file order based on acquisition time | ||
9 | + [~, id] = sort(datenums); | ||
10 | + | ||
11 | + %get the number of files | ||
12 | + Y = length(id); %size of the image along Y | ||
13 | + | ||
14 | + %load the first file to determine the spectral and X-axis size | ||
15 | + temp = readspe(filelist(1).name); | ||
16 | + X = size(temp, 1); %size of the image along X | ||
17 | + B = size(temp, 2); %number of bands in the image | ||
18 | + | ||
19 | + %create the cube | ||
20 | + I = zeros(X, Y, B); | ||
21 | + | ||
22 | + %for each line | ||
23 | + for y = 1:Y | ||
24 | + | ||
25 | + %read a SPE file | ||
26 | + img = readspe(filelist(id(y)).name); | ||
27 | + | ||
28 | + I(:, y, :) = permute(img, [1 3 2]); | ||
29 | + end | ||
30 | + | ||
31 | + enviSaveRaw(single(I), outfile, [outfile '.hdr']); | ||
32 | + | ||
33 | + | ||
34 | + |
matlab/brewermap.m renamed to matlab/stimBrewerMap.m
1 | +%Loads a standard Agilent ResPro binary file | ||
2 | +% stimLoadAgilent(filename) | ||
3 | +function S = stimLoadAgilent(filename) | ||
4 | + | ||
5 | + fid = fopen(filename); | ||
6 | + fseek(fid, 9, 'bof'); | ||
7 | + Z = fread(fid, 1, 'uint16'); | ||
8 | + fseek(fid, 13, 'cof'); | ||
9 | + X = fread(fid, 1, 'uint16'); | ||
10 | + Y = fread(fid, 1, 'uint16'); | ||
11 | + | ||
12 | + fseek(fid, 1020, 'bof'); | ||
13 | + | ||
14 | + S = reshape(fread(fid, [X, Y * Z], 'float32'), [X, Y, Z]); | ||
15 | + | ||
16 | + | ||
0 | \ No newline at end of file | 17 | \ No newline at end of file |
1 | +function [TPR, FPR, AUC] = stimROC(C, T) | ||
2 | +%build an ROC curve | ||
3 | +% C - class labels as an array of binary values (1 = true positive) | ||
4 | +% T - threshold used for classification | ||
5 | + | ||
6 | + %sort the thresholds in descending order and get the indices | ||
7 | + [~, I] = sort(T, 'descend'); | ||
8 | + | ||
9 | + %sort the class labels in the same order as the thresholds | ||
10 | + Cs = C(I); | ||
11 | + | ||
12 | + %calculate the number of measurements | ||
13 | + M = size(C, 2); | ||
14 | + | ||
15 | + %calculate the number of positives | ||
16 | + P = nnz(C); | ||
17 | + | ||
18 | + %calculate the number of negatives | ||
19 | + N = M - P; | ||
20 | + | ||
21 | + %if all examples are positives or negatives, return a perfect score? | ||
22 | + if P == M | ||
23 | + error('ERROR: no positive observations'); | ||
24 | + end | ||
25 | + if P == 0 | ||
26 | + error('ERROR: no negative observations'); | ||
27 | + end | ||
28 | + | ||
29 | + %allocate space for the ROC curve | ||
30 | + TPR = zeros(1, M); | ||
31 | + FPR = zeros(1, M); | ||
32 | + | ||
33 | + | ||
34 | + | ||
35 | + %calculate the number of inflection points | ||
36 | + ip = 0; | ||
37 | + for i = 2:M | ||
38 | + if Cs(i) ~= Cs(i-1) | ||
39 | + ip = ip + 1; | ||
40 | + end | ||
41 | + end | ||
42 | + | ||
43 | + %initialize the true and false positive rates to zero | ||
44 | + TP = 0; | ||
45 | + FP = 0; | ||
46 | + for i = 1:M | ||
47 | + if Cs(i) == 1 | ||
48 | + TP = TP + 1; | ||
49 | + else | ||
50 | + FP = FP + 1; | ||
51 | + end | ||
52 | + | ||
53 | + TPR(i) = TP / P; | ||
54 | + FPR(i) = FP / N; | ||
55 | + end | ||
56 | + | ||
57 | + %calculate the area under the ROC curve | ||
58 | + AUC = 0; | ||
59 | + for i = 2:M | ||
60 | + w = FPR(i) - FPR(i-1); | ||
61 | + h = TPR(i); | ||
62 | + AUC = AUC + w * h; | ||
63 | + end | ||
64 | + | ||
65 | + | ||
66 | + | ||
67 | + | ||
68 | + | ||
69 | + | ||
0 | \ No newline at end of file | 70 | \ No newline at end of file |
1 | +#!/usr/bin/python3 | ||
2 | + | ||
3 | +#import system processes | ||
4 | +import subprocess, sys | ||
5 | + | ||
6 | +if len(sys.argv) > 1: | ||
7 | + infile = int(sys.argv[1]) | ||
8 | + | ||
9 | +basefile = infile + "-base" | ||
10 | +normfile = infile + "-norm" | ||
11 | + | ||
12 | +runcommand = "hsiproc " + infile + basefile + " --baseline baseline.txt" | ||
13 | +subprocess.call(runcommand, shell=True) | ||
0 | \ No newline at end of file | 14 | \ No newline at end of file |
stim/biomodels/cellset.h
@@ -117,7 +117,7 @@ public: | @@ -117,7 +117,7 @@ public: | ||
117 | } | 117 | } |
118 | 118 | ||
119 | /// Return the maximum value of a field in this cell set | 119 | /// Return the maximum value of a field in this cell set |
120 | - double max(std::string field){ | 120 | + double maximum(std::string field){ |
121 | size_t idx = fields[field]; //get the field index | 121 | size_t idx = fields[field]; //get the field index |
122 | size_t ncells = cells.size(); //get the total number of cells | 122 | size_t ncells = cells.size(); //get the total number of cells |
123 | double maxval, val; //stores the current and maximum values | 123 | double maxval, val; //stores the current and maximum values |
@@ -130,7 +130,7 @@ public: | @@ -130,7 +130,7 @@ public: | ||
130 | } | 130 | } |
131 | 131 | ||
132 | /// Return the maximum value of a field in this cell set | 132 | /// Return the maximum value of a field in this cell set |
133 | - double min(std::string field){ | 133 | + double minimum(std::string field){ |
134 | size_t idx = fields[field]; //get the field index | 134 | size_t idx = fields[field]; //get the field index |
135 | size_t ncells = cells.size(); //get the total number of cells | 135 | size_t ncells = cells.size(); //get the total number of cells |
136 | double minval, val; //stores the current and maximum values | 136 | double minval, val; //stores the current and maximum values |
stim/biomodels/network.h
@@ -11,8 +11,8 @@ | @@ -11,8 +11,8 @@ | ||
11 | #include <stim/math/vec3.h> | 11 | #include <stim/math/vec3.h> |
12 | #include <stim/visualization/obj.h> | 12 | #include <stim/visualization/obj.h> |
13 | #include <stim/visualization/cylinder.h> | 13 | #include <stim/visualization/cylinder.h> |
14 | -#include <ANN/ANN.h> | ||
15 | -#include <boost/tuple/tuple.hpp> | 14 | +#include <stim/structures/kdtree.cuh> |
15 | +#include <stim/cuda/cudatools/timer.h> | ||
16 | 16 | ||
17 | 17 | ||
18 | namespace stim{ | 18 | namespace stim{ |
@@ -35,7 +35,7 @@ class network{ | @@ -35,7 +35,7 @@ class network{ | ||
35 | // default constructor | 35 | // default constructor |
36 | edge() : cylinder<T>() | 36 | edge() : cylinder<T>() |
37 | { | 37 | { |
38 | - v[1] = -1; v[0] = -1; | 38 | + v[1] = (unsigned)(-1); v[0] = (unsigned)(-1); |
39 | } | 39 | } |
40 | /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor | 40 | /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor |
41 | 41 | ||
@@ -57,7 +57,7 @@ class network{ | @@ -57,7 +57,7 @@ class network{ | ||
57 | /// Output the edge information as a string | 57 | /// Output the edge information as a string |
58 | std::string str(){ | 58 | std::string str(){ |
59 | std::stringstream ss; | 59 | std::stringstream ss; |
60 | - ss<<"("<<cylinder<T>::size()<<")\tl = "<<this.length()<<"\t"<<v[0]<<"----"<<v[1]; | 60 | + ss<<"("<<cylinder<T>::size()<<")\tl = "<<this->length()<<"\t"<<v[0]<<"----"<<v[1]; |
61 | return ss.str(); | 61 | return ss.str(); |
62 | } | 62 | } |
63 | 63 | ||
@@ -125,7 +125,9 @@ public: | @@ -125,7 +125,9 @@ public: | ||
125 | return V.size(); | 125 | return V.size(); |
126 | } | 126 | } |
127 | 127 | ||
128 | - std::vector<vertex> operator*(T s){ | 128 | + //scale the network by some constant value |
129 | + // I don't think these work?????? | ||
130 | + /*std::vector<vertex> operator*(T s){ | ||
129 | for (unsigned i=0; i< vertices; i ++ ){ | 131 | for (unsigned i=0; i< vertices; i ++ ){ |
130 | V[i] = V[i] * s; | 132 | V[i] = V[i] * s; |
131 | } | 133 | } |
@@ -139,10 +141,9 @@ public: | @@ -139,10 +141,9 @@ public: | ||
139 | } | 141 | } |
140 | } | 142 | } |
141 | return V; | 143 | return V; |
142 | - } | 144 | + }*/ |
143 | 145 | ||
144 | // Returns an average of branching index in the network | 146 | // Returns an average of branching index in the network |
145 | - | ||
146 | double BranchingIndex(){ | 147 | double BranchingIndex(){ |
147 | double B=0; | 148 | double B=0; |
148 | for(unsigned v=0; v < V.size(); v ++){ | 149 | for(unsigned v=0; v < V.size(); v ++){ |
@@ -154,7 +155,6 @@ public: | @@ -154,7 +155,6 @@ public: | ||
154 | } | 155 | } |
155 | 156 | ||
156 | // Returns number of branch points in thenetwork | 157 | // Returns number of branch points in thenetwork |
157 | - | ||
158 | unsigned int BranchP(){ | 158 | unsigned int BranchP(){ |
159 | unsigned int B=0; | 159 | unsigned int B=0; |
160 | unsigned int c; | 160 | unsigned int c; |
@@ -168,7 +168,6 @@ public: | @@ -168,7 +168,6 @@ public: | ||
168 | } | 168 | } |
169 | 169 | ||
170 | // Returns number of end points (tips) in thenetwork | 170 | // Returns number of end points (tips) in thenetwork |
171 | - | ||
172 | unsigned int EndP(){ | 171 | unsigned int EndP(){ |
173 | unsigned int B=0; | 172 | unsigned int B=0; |
174 | unsigned int c; | 173 | unsigned int c; |
@@ -202,10 +201,11 @@ public: | @@ -202,10 +201,11 @@ public: | ||
202 | // return s; | 201 | // return s; |
203 | //} | 202 | //} |
204 | 203 | ||
205 | - | 204 | + //Calculate Metrics--------------------------------------------------- |
206 | // Returns an average of fiber/edge lengths in the network | 205 | // Returns an average of fiber/edge lengths in the network |
207 | double Lengths(){ | 206 | double Lengths(){ |
208 | - stim::vec<T> L;double sumLength = 0; | 207 | + stim::vec<T> L; |
208 | + double sumLength = 0; | ||
209 | for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network | 209 | for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network |
210 | L.push_back(E[e].length()); //append the edge length | 210 | L.push_back(E[e].length()); //append the edge length |
211 | sumLength = sumLength + E[e].length(); | 211 | sumLength = sumLength + E[e].length(); |
@@ -269,8 +269,10 @@ public: | @@ -269,8 +269,10 @@ public: | ||
269 | double avg = sumFractDim / E.size(); | 269 | double avg = sumFractDim / E.size(); |
270 | return avg; | 270 | return avg; |
271 | } | 271 | } |
272 | - stim::cylinder<T> get_cylinder(unsigned f){ | ||
273 | - return E[f]; //return the specified edge (casting it to a fiber) | 272 | + |
273 | + //returns a cylinder represented a given fiber (based on edge index) | ||
274 | + stim::cylinder<T> get_cylinder(unsigned e){ | ||
275 | + return E[e]; //return the specified edge (casting it to a fiber) | ||
274 | } | 276 | } |
275 | 277 | ||
276 | //load a network from an OBJ file | 278 | //load a network from an OBJ file |
@@ -385,11 +387,27 @@ public: | @@ -385,11 +387,27 @@ public: | ||
385 | return n; | 387 | return n; |
386 | } | 388 | } |
387 | 389 | ||
390 | + //Copy the point cloud representing the centerline for the network into an array | ||
391 | + void centerline_cloud(T* dst) { | ||
392 | + size_t p; //stores the current edge point | ||
393 | + size_t P; //stores the number of points in an edge | ||
394 | + size_t i = 0; //index into the output array of points | ||
395 | + for (size_t e = 0; e < E.size(); e++) { //for each edge in the network | ||
396 | + P = E[e].size(); //get the number of points in this edge | ||
397 | + for (p = 0; p < P; p++) { | ||
398 | + dst[i * 3 + 0] = E[e][p][0]; | ||
399 | + dst[i * 3 + 1] = E[e][p][1]; | ||
400 | + dst[i * 3 + 2] = E[e][p][2]; | ||
401 | + i++; | ||
402 | + } | ||
403 | + } | ||
404 | + } | ||
405 | + | ||
388 | // gaussian function | 406 | // gaussian function |
389 | float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25 | 407 | float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25 |
390 | 408 | ||
391 | - // stim 3d vector to annpoint of 3 dimensions | ||
392 | - void stim2ann(ANNpoint &a, stim::vec3<T> b){ | 409 | + // convert vec3 to array |
410 | + void stim2array(float *a, stim::vec3<T> b){ | ||
393 | a[0] = b[0]; | 411 | a[0] = b[0]; |
394 | a[1] = b[1]; | 412 | a[1] = b[1]; |
395 | a[2] = b[2]; | 413 | a[2] = b[2]; |
@@ -413,57 +431,81 @@ public: | @@ -413,57 +431,81 @@ public: | ||
413 | 431 | ||
414 | /// @param A is the network to compare to - the field is generated for A | 432 | /// @param A is the network to compare to - the field is generated for A |
415 | /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison | 433 | /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison |
416 | - stim::network<T> compare(stim::network<T> A, float sigma){ | 434 | + stim::network<T> compare(stim::network<T> A, float sigma, int device){ |
417 | 435 | ||
418 | - stim::network<T> R; //generate a network storing the result of the comparison | ||
419 | - R = (*this); //initialize the result with the current network | 436 | + stim::network<T> R; //generate a network storing the result of the comparison |
437 | + R = (*this); //initialize the result with the current network | ||
420 | 438 | ||
421 | - //generate a KD-tree for network A | ||
422 | - float metric = 0.0; // initialize metric to be returned after comparing the networks | ||
423 | - ANNkd_tree* kdt; // initialize a pointer to a kd tree | ||
424 | - double **c; // centerline (array of double pointers) - points on kdtree must be double | ||
425 | - unsigned int n_data = A.total_points(); // set the number of points | ||
426 | - c = (double**) malloc(sizeof(double*) * n_data); // allocate the array pointer | ||
427 | - for(unsigned int i = 0; i < n_data; i++) // allocate space for each point of 3 dimensions | ||
428 | - c[i] = (double*) malloc(sizeof(double) * 3); | 439 | + T *c; // centerline (array of double pointers) - points on kdtree must be double |
440 | + size_t n_data = A.total_points(); // set the number of points | ||
441 | + c = (T*) malloc(sizeof(T) * n_data * 3); //allocate an array to store all points in the data set | ||
429 | 442 | ||
430 | unsigned t = 0; | 443 | unsigned t = 0; |
431 | - for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network | ||
432 | - for(unsigned p = 0; p < A.E[e].size(); p++){ //for each point in the edge | 444 | + for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network |
445 | + for(unsigned p = 0; p < A.E[e].size(); p++){ //for each point in the edge | ||
433 | for(unsigned d = 0; d < 3; d++){ //for each coordinate | 446 | for(unsigned d = 0; d < 3; d++){ //for each coordinate |
434 | 447 | ||
435 | - c[t][d] = A.E[e][p][d]; | 448 | + c[t * 3 + d] = A.E[e][p][d]; //copy the point into the array c |
436 | } | 449 | } |
437 | t++; | 450 | t++; |
438 | } | 451 | } |
439 | } | 452 | } |
440 | 453 | ||
454 | + //generate a KD-tree for network A | ||
455 | + //float metric = 0.0; // initialize metric to be returned after comparing the network | ||
456 | + size_t MaxTreeLevels = 3; // max tree level | ||
457 | + | ||
458 | +#ifdef __CUDACC__ | ||
459 | + cudaSetDevice(device); | ||
460 | + stim::cuda_kdtree<T, 3> kdt; // initialize a pointer to a kd tree | ||
461 | + | ||
441 | //compare each point in the current network to the field produced by A | 462 | //compare each point in the current network to the field produced by A |
442 | - ANNpointArray pts = (ANNpointArray)c; // create an array of data points of type double | ||
443 | - kdt = new ANNkd_tree(pts, n_data, 3); // build a KD tree using the annpointarray | ||
444 | - double eps = 0; // error bound | ||
445 | - ANNdistArray dists = new ANNdist[1]; // near neighbor distances | ||
446 | - ANNidxArray nnIdx = new ANNidx[1]; // near neighbor indices // allocate near neigh indices | 463 | + kdt.create(c, n_data, MaxTreeLevels); // build a KD tree |
464 | + T *dists = new T[1]; // near neighbor distances | ||
465 | + size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices | ||
447 | 466 | ||
448 | stim::vec3<T> p0, p1; | 467 | stim::vec3<T> p0, p1; |
449 | - float m1; | ||
450 | - float M = 0; //stores the total metric value | ||
451 | - float L = 0; //stores the total network length | ||
452 | - ANNpoint queryPt = annAllocPt(3); | 468 | + T m1; |
469 | + //float M = 0; //stores the total metric value | ||
470 | + //float L = 0; //stores the total network length | ||
471 | + T* queryPt = new T[3]; | ||
453 | for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A | 472 | for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A |
454 | R.E[e].add_mag(0); //add a new magnitude for the metric | 473 | R.E[e].add_mag(0); //add a new magnitude for the metric |
455 | 474 | ||
456 | for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge | 475 | for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge |
457 | 476 | ||
458 | p1 = R.E[e][p]; //get the next point in the edge | 477 | p1 = R.E[e][p]; //get the next point in the edge |
459 | - stim2ann(queryPt, p1); | ||
460 | - kdt->annkSearch( queryPt, 1, nnIdx, dists, eps); //find the distance between A and the current network | ||
461 | - m1 = 1.0f - gaussianFunction((float)dists[0], sigma); //calculate the metric value based on the distance | 478 | + stim2array(queryPt, p1); |
479 | + kdt.search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network | ||
480 | + | ||
481 | + m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance | ||
462 | R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment | 482 | R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment |
463 | 483 | ||
464 | } | 484 | } |
465 | } | 485 | } |
486 | +#else | ||
487 | + stim::cpu_kdtree<T, 3> kdt; | ||
488 | + kdt.create(c, n_data, MaxTreeLevels); | ||
489 | + T *dists = new T[1]; // near neighbor distances | ||
490 | + size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices | ||
491 | + | ||
492 | + stim::vec3<T> p0, p1; | ||
493 | + T m1; | ||
494 | + T* queryPt = new T[3]; | ||
495 | + for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A | ||
496 | + R.E[e].add_mag(0); //add a new magnitude for the metric | ||
497 | + | ||
498 | + for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge | ||
466 | 499 | ||
500 | + p1 = R.E[e][p]; //get the next point in the edge | ||
501 | + stim2array(queryPt, p1); | ||
502 | + kdt.cpu_search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network | ||
503 | + | ||
504 | + m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance | ||
505 | + R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment | ||
506 | + } | ||
507 | + } | ||
508 | +#endif | ||
467 | return R; //return the resulting network | 509 | return R; //return the resulting network |
468 | } | 510 | } |
469 | 511 | ||
@@ -487,7 +529,7 @@ public: | @@ -487,7 +529,7 @@ public: | ||
487 | void load_txt(std::string filename) | 529 | void load_txt(std::string filename) |
488 | { | 530 | { |
489 | std::vector <std::string> file_contents; | 531 | std::vector <std::string> file_contents; |
490 | - std::ifstream file(filename); | 532 | + std::ifstream file(filename.c_str()); |
491 | std::string line; | 533 | std::string line; |
492 | std::vector<unsigned> id2vert; //this list stores the vertex ID associated with each network vertex | 534 | std::vector<unsigned> id2vert; //this list stores the vertex ID associated with each network vertex |
493 | //for each line in the text file, store them as strings in file_contents | 535 | //for each line in the text file, store them as strings in file_contents |
@@ -538,7 +580,7 @@ public: | @@ -538,7 +580,7 @@ public: | ||
538 | for(unsigned int d = 0; d < 3; d++){ | 580 | for(unsigned int d = 0; d < 3; d++){ |
539 | ss<<p[i][d]; | 581 | ss<<p[i][d]; |
540 | } | 582 | } |
541 | - ss < "\n"; | 583 | + ss << "\n"; |
542 | } | 584 | } |
543 | return ss.str(); | 585 | return ss.str(); |
544 | } | 586 | } |
@@ -552,8 +594,8 @@ public: | @@ -552,8 +594,8 @@ public: | ||
552 | void | 594 | void |
553 | to_txt(std::string filename) | 595 | to_txt(std::string filename) |
554 | { | 596 | { |
555 | - std::ofstream ofs(filename, std::ofstream::out | std::ofstream::app); | ||
556 | - int num; | 597 | + std::ofstream ofs(filename.c_str(), std::ofstream::out | std::ofstream::app); |
598 | + //int num; | ||
557 | ofs << (E.size()).str() << "\n"; | 599 | ofs << (E.size()).str() << "\n"; |
558 | for(unsigned int i = 0; i < E.size(); i++) | 600 | for(unsigned int i = 0; i < E.size(); i++) |
559 | { | 601 | { |
@@ -566,7 +608,8 @@ public: | @@ -566,7 +608,8 @@ public: | ||
566 | { | 608 | { |
567 | std::string str; | 609 | std::string str; |
568 | str = V[i].str(); | 610 | str = V[i].str(); |
569 | - removeCharsFromString(str, "[],"); | 611 | + char temp[4] = "[],"; |
612 | + removeCharsFromString(str, temp); | ||
570 | ofs << str << "\n"; | 613 | ofs << str << "\n"; |
571 | } | 614 | } |
572 | ofs.close(); | 615 | ofs.close(); |
stim/biomodels/network_dep.h
@@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
4 | #include <stim/math/vector.h> | 4 | #include <stim/math/vector.h> |
5 | #include <stim/visualization/obj.h> | 5 | #include <stim/visualization/obj.h> |
6 | #include <list> | 6 | #include <list> |
7 | -#include <ANN/ANN.h> | 7 | +//#include <ANN/ANN.h> |
8 | 8 | ||
9 | namespace stim{ | 9 | namespace stim{ |
10 | 10 |
stim/cuda/cudatools/error.h
1 | +#ifndef STIM_CUDA_ERROR_H | ||
2 | +#define STIM_CUDA_ERROR_H | ||
3 | + | ||
1 | #include <stdio.h> | 4 | #include <stdio.h> |
2 | #include <iostream> | 5 | #include <iostream> |
3 | using namespace std; | 6 | using namespace std; |
4 | #include "cuda_runtime.h" | 7 | #include "cuda_runtime.h" |
5 | #include "device_launch_parameters.h" | 8 | #include "device_launch_parameters.h" |
6 | #include "cufft.h" | 9 | #include "cufft.h" |
7 | - | ||
8 | -#ifndef CUDA_HANDLE_ERROR_H | ||
9 | -#define CUDA_HANDLE_ERROR_H | 10 | +#include "cublas_v2.h" |
10 | 11 | ||
11 | //handle error macro | 12 | //handle error macro |
12 | -static void HandleError( cudaError_t err, const char *file, int line ) { | 13 | +static void cuHandleError( cudaError_t err, const char *file, int line ) { |
13 | if (err != cudaSuccess) { | 14 | if (err != cudaSuccess) { |
14 | - //FILE* outfile = fopen("cudaErrorLog.txt", "w"); | ||
15 | - //fprintf(outfile, "%s in %s at line %d\n", cudaGetErrorString( err ), file, line ); | ||
16 | - //fclose(outfile); | ||
17 | printf("%s in %s at line %d\n", cudaGetErrorString( err ), file, line ); | 15 | printf("%s in %s at line %d\n", cudaGetErrorString( err ), file, line ); |
18 | - //exit( EXIT_FAILURE ); | ||
19 | 16 | ||
20 | } | 17 | } |
21 | } | 18 | } |
22 | -#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) | 19 | +#define HANDLE_ERROR( err ) (cuHandleError( err, __FILE__, __LINE__ )) |
23 | 20 | ||
24 | -static void CufftError( cufftResult err ) | 21 | +static void cufftHandleError( cufftResult err, const char*file, int line ) |
25 | { | 22 | { |
26 | if (err != CUFFT_SUCCESS) | 23 | if (err != CUFFT_SUCCESS) |
27 | { | 24 | { |
@@ -42,7 +39,29 @@ static void CufftError( cufftResult err ) | @@ -42,7 +39,29 @@ static void CufftError( cufftResult err ) | ||
42 | 39 | ||
43 | } | 40 | } |
44 | } | 41 | } |
42 | +#define CUFFT_HANDLE_ERROR( err ) (cufftHandleError( err, __FILE__, __LINE__ )) | ||
45 | 43 | ||
44 | +static void cublasHandleError( cublasStatus_t err, const char*file, int line ){ | ||
45 | + if(err != CUBLAS_STATUS_SUCCESS){ | ||
46 | + if(err == CUBLAS_STATUS_NOT_INITIALIZED) | ||
47 | + std::cout<<"CUBLAS_STATUS_NOT_INITIALIZED" <<" in file "<<file<<" line "<<std::endl; | ||
48 | + else if(err == CUBLAS_STATUS_ALLOC_FAILED) | ||
49 | + std::cout<<"CUBLAS_STATUS_ALLOC_FAILED" <<" in file "<<file<<" line "<<std::endl; | ||
50 | + else if(err == CUBLAS_STATUS_INVALID_VALUE) | ||
51 | + std::cout<<"CUBLAS_STATUS_INVALID_VALUE" <<" in file "<<file<<" line "<<std::endl; | ||
52 | + else if(err == CUBLAS_STATUS_ARCH_MISMATCH) | ||
53 | + std::cout<<"CUBLAS_STATUS_ARCH_MISMATCH" <<" in file "<<file<<" line "<<std::endl; | ||
54 | + else if(err == CUBLAS_STATUS_MAPPING_ERROR) | ||
55 | + std::cout<<"CUBLAS_STATUS_MAPPING_ERROR" <<" in file "<<file<<" line "<<std::endl; | ||
56 | + else if(err == CUBLAS_STATUS_EXECUTION_FAILED) | ||
57 | + std::cout<<"CUBLAS_STATUS_EXECUTION_FAILED" <<" in file "<<file<<" line "<<std::endl; | ||
58 | + else if(err == CUBLAS_STATUS_INTERNAL_ERROR) | ||
59 | + std::cout<<"CUBLAS_STATUS_INTERNAL_ERROR" <<" in file "<<file<<" line "<<std::endl; | ||
60 | + else | ||
61 | + std::cout<<"Unknown error"<<" in file "<<file<<" line "<<std::endl; | ||
62 | + } | ||
63 | +} | ||
64 | +#define CUBLAS_HANDLE_ERROR( err ) (cublasHandleError( err, __FILE__, __LINE__ )) | ||
46 | 65 | ||
47 | 66 | ||
48 | #endif | 67 | #endif |
stim/envi/agilent_binary.h
@@ -4,13 +4,15 @@ | @@ -4,13 +4,15 @@ | ||
4 | 4 | ||
5 | #include <string> | 5 | #include <string> |
6 | #include <fstream> | 6 | #include <fstream> |
7 | +#include <complex> | ||
7 | 8 | ||
8 | //CUDA | 9 | //CUDA |
9 | -#ifdef CUDA_FOUND | ||
10 | - #include <cuda_runtime.h> | ||
11 | - #include "cufft.h" | ||
12 | - #include <stim/cuda/cudatools/error.h> | ||
13 | -#endif | 10 | +//#ifdef CUDA_FOUND |
11 | +#include <cuda_runtime.h> | ||
12 | +#include "cufft.h" | ||
13 | +#include <stim/cuda/cudatools/error.h> | ||
14 | +#include <stim/envi/envi_header.h> | ||
15 | +//#endif | ||
14 | 16 | ||
15 | namespace stim{ | 17 | namespace stim{ |
16 | 18 | ||
@@ -19,10 +21,10 @@ class agilent_binary{ | @@ -19,10 +21,10 @@ class agilent_binary{ | ||
19 | 21 | ||
20 | protected: | 22 | protected: |
21 | std::string fname; | 23 | std::string fname; |
22 | - T* ptr; | ||
23 | - size_t R[3]; | ||
24 | - static const size_t header = 1020; | ||
25 | - double Z[2]; | 24 | + T* ptr; //pointer to the image data |
25 | + size_t R[3]; //size of the binary image in X, Y, and Z | ||
26 | + static const size_t header = 1020; //header size | ||
27 | + double Z[2]; //range of z values (position or wavelength) | ||
26 | 28 | ||
27 | public: | 29 | public: |
28 | size_t size(){ | 30 | size_t size(){ |
@@ -42,6 +44,10 @@ public: | @@ -42,6 +44,10 @@ public: | ||
42 | alloc(); | 44 | alloc(); |
43 | } | 45 | } |
44 | 46 | ||
47 | + size_t dim(size_t i){ | ||
48 | + return R[i]; | ||
49 | + } | ||
50 | + | ||
45 | /// Create a deep copy of an agileng_binary object | 51 | /// Create a deep copy of an agileng_binary object |
46 | void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){ | 52 | void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){ |
47 | dst->alloc(src->R[0], src->R[1], src->R[2]); //allocate memory | 53 | dst->alloc(src->R[0], src->R[1], src->R[2]); //allocate memory |
@@ -136,6 +142,42 @@ public: | @@ -136,6 +142,42 @@ public: | ||
136 | return header; | 142 | return header; |
137 | } | 143 | } |
138 | 144 | ||
145 | + /// Subtract the mean from each pixel. Generally used for centering an interferogram. | ||
146 | + void meancenter(){ | ||
147 | + size_t Z = R[2]; //store the number of bands | ||
148 | + size_t XY = R[0] * R[1]; //store the number of pixels in the image | ||
149 | + T sum = (T)0; | ||
150 | + T mean; | ||
151 | + for(size_t xy = 0; xy < XY; xy++){ //for each pixel | ||
152 | + sum = 0; | ||
153 | + for(size_t z = 0; z < Z; z++){ //for each band | ||
154 | + sum += ptr[ z * XY + xy ]; //add the band value to a running sum | ||
155 | + } | ||
156 | + mean = sum / (T)Z; //calculate the pixel mean | ||
157 | + for(size_t z = 0; z < Z; z++){ | ||
158 | + ptr[ z * XY + xy ] -= mean; //subtract the mean from each band | ||
159 | + } | ||
160 | + } | ||
161 | + } | ||
162 | + | ||
163 | + /// adds n bands of zero padding to the end of the file | ||
164 | + void zeropad(size_t n){ | ||
165 | + size_t newZ = R[2] + n; | ||
166 | + T* temp = (T*) calloc(R[0] * R[1] * newZ, sizeof(T)); //allocate space for the new image | ||
167 | + memcpy(temp, ptr, size() * sizeof(T)); //copy the old data to the new image | ||
168 | + | ||
169 | + free(ptr); //free the old data | ||
170 | + ptr = temp; //swap in the new data | ||
171 | + R[2] = newZ; //set the z-dimension to the new zero value | ||
172 | + } | ||
173 | + | ||
174 | + //pads to the nearest power-of-two | ||
175 | + void zeropad(){ | ||
176 | + size_t newZ = (size_t)pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two | ||
177 | + size_t n = newZ - R[2]; //calculate the number of bands to add | ||
178 | + zeropad(n); //add the padding | ||
179 | + } | ||
180 | + | ||
139 | /// Calculate the absorbance spectrum from the transmission spectrum given a background | 181 | /// Calculate the absorbance spectrum from the transmission spectrum given a background |
140 | void absorbance(stim::agilent_binary<T>* background){ | 182 | void absorbance(stim::agilent_binary<T>* background){ |
141 | size_t N = size(); //calculate the number of values to be ratioed | 183 | size_t N = size(); //calculate the number of values to be ratioed |
@@ -147,7 +189,7 @@ public: | @@ -147,7 +189,7 @@ public: | ||
147 | ptr[i] = -log10(ptr[i] / background->ptr[i]); | 189 | ptr[i] = -log10(ptr[i] / background->ptr[i]); |
148 | } | 190 | } |
149 | 191 | ||
150 | -#ifdef CUDA_FOUND | 192 | +//#ifdef CUDA_FOUND |
151 | /// Perform an FFT and return a binary file with bands in the specified range | 193 | /// Perform an FFT and return a binary file with bands in the specified range |
152 | agilent_binary<T> fft(double band_min, double band_max, double ELWN = 15798, int UDR = 2){ | 194 | agilent_binary<T> fft(double band_min, double band_max, double ELWN = 15798, int UDR = 2){ |
153 | auto total_start = std::chrono::high_resolution_clock::now(); | 195 | auto total_start = std::chrono::high_resolution_clock::now(); |
@@ -234,7 +276,22 @@ public: | @@ -234,7 +276,22 @@ public: | ||
234 | 276 | ||
235 | return result; | 277 | return result; |
236 | } | 278 | } |
237 | -#endif | 279 | + |
280 | + //saves the binary as an ENVI file with a BIP interleave format | ||
281 | + int bip(T* bip_ptr){ | ||
282 | + //std::ofstream out(outfile.c_str(), std::ios::binary); //create a binary file stream for output | ||
283 | + size_t XY = R[0] * R[1]; | ||
284 | + size_t B = R[2]; | ||
285 | + size_t b; | ||
286 | + | ||
287 | + for(size_t xy = 0; xy < XY; xy++){ | ||
288 | + for(b = 0; b < B; b++){ | ||
289 | + bip_ptr[xy * B + b] = ptr[b * XY + xy]; | ||
290 | + } | ||
291 | + } | ||
292 | + return 0; | ||
293 | + } | ||
294 | +//#endif | ||
238 | 295 | ||
239 | }; | 296 | }; |
240 | 297 |
stim/envi/bil.h
@@ -4,6 +4,7 @@ | @@ -4,6 +4,7 @@ | ||
4 | #include "../envi/envi_header.h" | 4 | #include "../envi/envi_header.h" |
5 | #include "../envi/hsi.h" | 5 | #include "../envi/hsi.h" |
6 | #include "../math/fd_coefficients.h" | 6 | #include "../math/fd_coefficients.h" |
7 | +#include <stim/cuda/cudatools/error.h> | ||
7 | #include <cstring> | 8 | #include <cstring> |
8 | #include <utility> | 9 | #include <utility> |
9 | #include <deque> | 10 | #include <deque> |
@@ -118,7 +119,7 @@ public: | @@ -118,7 +119,7 @@ public: | ||
118 | page++; | 119 | page++; |
119 | //if wavelength is larger than the last wavelength in header file | 120 | //if wavelength is larger than the last wavelength in header file |
120 | if (page == Z()) { | 121 | if (page == Z()) { |
121 | - band_index(p, Z()-1); | 122 | + band_index(p, Z()-1, PROGRESS); |
122 | return true; | 123 | return true; |
123 | } | 124 | } |
124 | } | 125 | } |
@@ -224,10 +225,44 @@ public: | @@ -224,10 +225,44 @@ public: | ||
224 | } | 225 | } |
225 | 226 | ||
226 | //given a Y ,return a XZ slice | 227 | //given a Y ,return a XZ slice |
227 | - bool read_plane_y(T * p, unsigned long long y){ | 228 | + bool read_plane_xz(T * p, size_t y){ |
228 | return binary<T>::read_plane_2(p, y); | 229 | return binary<T>::read_plane_2(p, y); |
229 | } | 230 | } |
230 | 231 | ||
232 | + //given a Y, return ZX slice (transposed such that the spectrum is the leading dimension) | ||
233 | + int read_plane_zx(T* p, size_t y){ | ||
234 | + T* temp = (T*) malloc(X() * Z() * sizeof(T)); //allocate space to store the temporary xz plane | ||
235 | + binary<T>::read_plane_2(temp, y); //load the plane from disk | ||
236 | + size_t z, x; | ||
237 | + for(z = 0; z < Z(); z++){ | ||
238 | + for(x = 0; x <= z; x++){ | ||
239 | + p[x * Z() + z] = temp[z * X() + x]; //copy to the destination frame | ||
240 | + } | ||
241 | + } | ||
242 | + } | ||
243 | + | ||
244 | + //load a frame y into a pre-allocated double-precision array | ||
245 | + int read_plane_xzd(double* f, size_t y){ | ||
246 | + size_t XB = X() * Z(); | ||
247 | + T* temp = (T*) malloc(XB * sizeof(T)); //create a temporary location to store the plane at current precision | ||
248 | + if(!read_plane_y(temp, y)) return 1; //read the plane in its native format, if it fails return a 1 | ||
249 | + for(size_t i = 0; i < XB; i++) f[i] = temp[i]; //convert the plane to a double | ||
250 | + return 0; | ||
251 | + } | ||
252 | + | ||
253 | + //given a Y, return ZX slice (transposed such that the spectrum is the leading dimension) | ||
254 | + int read_plane_zxd(double* p, size_t y){ | ||
255 | + T* temp = (T*) malloc(X() * Z() * sizeof(T)); //allocate space to store the temporary xz plane | ||
256 | + binary<T>::read_plane_2(temp, y); //load the plane from disk | ||
257 | + size_t z, x; | ||
258 | + for(z = 0; z < Z(); z++){ | ||
259 | + for(x = 0; x < X(); x++){ | ||
260 | + p[x * Z() + z] = (double)temp[z * X() + x]; //copy to the destination frame | ||
261 | + } | ||
262 | + } | ||
263 | + return 0; | ||
264 | + } | ||
265 | + | ||
231 | 266 | ||
232 | /// Perform baseline correction given a list of baseline points and stores the result in a new BSQ file. | 267 | /// Perform baseline correction given a list of baseline points and stores the result in a new BSQ file. |
233 | 268 | ||
@@ -268,7 +303,7 @@ public: | @@ -268,7 +303,7 @@ public: | ||
268 | for (unsigned long long k =0; k < Y(); k++) | 303 | for (unsigned long long k =0; k < Y(); k++) |
269 | { | 304 | { |
270 | //get the current y slice | 305 | //get the current y slice |
271 | - read_plane_y(c, k); | 306 | + read_plane_xz(c, k); |
272 | 307 | ||
273 | //initialize lownum, highnum, low, high | 308 | //initialize lownum, highnum, low, high |
274 | ai = w[0]; | 309 | ai = w[0]; |
@@ -369,7 +404,7 @@ public: | @@ -369,7 +404,7 @@ public: | ||
369 | 404 | ||
370 | for(unsigned long long j = 0; j < Y(); j++) | 405 | for(unsigned long long j = 0; j < Y(); j++) |
371 | { | 406 | { |
372 | - read_plane_y(c, j); | 407 | + read_plane_xz(c, j); |
373 | for(unsigned long long i = 0; i < B; i++) | 408 | for(unsigned long long i = 0; i < B; i++) |
374 | { | 409 | { |
375 | for(unsigned long long m = 0; m < X(); m++) | 410 | for(unsigned long long m = 0; m < X(); m++) |
@@ -469,7 +504,7 @@ public: | @@ -469,7 +504,7 @@ public: | ||
469 | 504 | ||
470 | for ( unsigned long long i = 0; i < Y(); i++) | 505 | for ( unsigned long long i = 0; i < Y(); i++) |
471 | { | 506 | { |
472 | - read_plane_y(p, i); | 507 | + read_plane_xz(p, i); |
473 | for ( unsigned long long k = 0; k < Z(); k++) | 508 | for ( unsigned long long k = 0; k < Z(); k++) |
474 | { | 509 | { |
475 | unsigned long long ks = k * X(); | 510 | unsigned long long ks = k * X(); |
@@ -863,7 +898,7 @@ public: | @@ -863,7 +898,7 @@ public: | ||
863 | 898 | ||
864 | for (unsigned long long i = 0; i < Y(); i++) //for each value in Y() (BIP should be X) | 899 | for (unsigned long long i = 0; i < Y(); i++) //for each value in Y() (BIP should be X) |
865 | { | 900 | { |
866 | - read_plane_y(temp, i); //retrieve an ZX slice, stored in temp | 901 | + read_plane_xz(temp, i); //retrieve an ZX slice, stored in temp |
867 | for ( unsigned long long j = 0; j < Z(); j++) //for each Z() (Y) | 902 | for ( unsigned long long j = 0; j < Z(); j++) //for each Z() (Y) |
868 | { | 903 | { |
869 | for (unsigned long long k = 0; k < X(); k++) //for each band | 904 | for (unsigned long long k = 0; k < X(); k++) //for each band |
@@ -933,7 +968,7 @@ public: | @@ -933,7 +968,7 @@ public: | ||
933 | //for each slice along the y axis | 968 | //for each slice along the y axis |
934 | for (unsigned long long y = 0; y < Y(); y++) //Select a page by choosing Y coordinate, Y() | 969 | for (unsigned long long y = 0; y < Y(); y++) //Select a page by choosing Y coordinate, Y() |
935 | { | 970 | { |
936 | - read_plane_y(slice, y); //retrieve an ZX page, store in "slice" | 971 | + read_plane_xz(slice, y); //retrieve an ZX page, store in "slice" |
937 | 972 | ||
938 | //for each sample along X | 973 | //for each sample along X |
939 | for (unsigned long long x = 0; x < X(); x++) //Select a pixel by choosing X coordinate in the page, X() | 974 | for (unsigned long long x = 0; x < X(); x++) //Select a pixel by choosing X coordinate in the page, X() |
@@ -992,43 +1027,136 @@ public: | @@ -992,43 +1027,136 @@ public: | ||
992 | 1027 | ||
993 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum | 1028 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum |
994 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1029 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
995 | - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){ | 1030 | + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){ |
996 | unsigned long long XZ = X() * Z(); | 1031 | unsigned long long XZ = X() * Z(); |
997 | unsigned long long XY = X() * Y(); | 1032 | unsigned long long XY = X() * Y(); |
998 | T* temp = (T*)malloc(sizeof(T) * XZ); | 1033 | T* temp = (T*)malloc(sizeof(T) * XZ); |
999 | - for (unsigned long long j = 0; j < Z(); j++){ | ||
1000 | - p[j] = 0; | ||
1001 | - } | 1034 | + memset(m, 0, Z() * sizeof(double)); //initialize the mean to zero |
1035 | + double* e_x2 = (double*)malloc(Z() * sizeof(double)); //allocate space for E[x^2] | ||
1036 | + memset(e_x2, 0, Z() * sizeof(double)); //initialize E[x^2] to zero | ||
1002 | //calculate vaild number in a band | 1037 | //calculate vaild number in a band |
1003 | - unsigned long long count = 0; | ||
1004 | - for (unsigned long long j = 0; j < XY; j++){ | ||
1005 | - if (mask == NULL || mask[j] != 0){ | ||
1006 | - count++; | ||
1007 | - } | ||
1008 | - } | 1038 | + size_t count = nnz(mask); //count the number of pixels in the mask |
1039 | + | ||
1040 | + double x; //create a register to store the pixel value | ||
1009 | for (unsigned long long k = 0; k < Y(); k++){ | 1041 | for (unsigned long long k = 0; k < Y(); k++){ |
1010 | - read_plane_y(temp, k); | 1042 | + read_plane_xz(temp, k); |
1011 | unsigned long long kx = k * X(); | 1043 | unsigned long long kx = k * X(); |
1012 | for (unsigned long long i = 0; i < X(); i++){ | 1044 | for (unsigned long long i = 0; i < X(); i++){ |
1013 | if (mask == NULL || mask[kx + i] != 0){ | 1045 | if (mask == NULL || mask[kx + i] != 0){ |
1014 | for (unsigned long long j = 0; j < Z(); j++){ | 1046 | for (unsigned long long j = 0; j < Z(); j++){ |
1015 | - p[j] += temp[j * X() + i] / (double)count; | 1047 | + x = temp[j * X() + i]; |
1048 | + m[j] += x / (double)count; | ||
1049 | + e_x2[j] += x*x / (double)count; | ||
1016 | } | 1050 | } |
1017 | } | 1051 | } |
1018 | } | 1052 | } |
1019 | if(PROGRESS) progress = (double)(k+1) / Y() * 100; | 1053 | if(PROGRESS) progress = (double)(k+1) / Y() * 100; |
1020 | } | 1054 | } |
1055 | + | ||
1056 | + for(size_t i = 0; i < Z(); i++) //calculate the standard deviation | ||
1057 | + std[i] = sqrt(e_x2[i] - m[i] * m[i]); | ||
1058 | + | ||
1021 | free(temp); | 1059 | free(temp); |
1022 | return true; | 1060 | return true; |
1023 | } | 1061 | } |
1024 | 1062 | ||
1063 | + int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | ||
1064 | + cublasStatus_t stat; | ||
1065 | + cublasHandle_t handle; | ||
1066 | + | ||
1067 | + progress = 0; //initialize the progress to zero (0) | ||
1068 | + size_t XY = X() * Y(); //calculate the number of elements in a band image | ||
1069 | + size_t XB = X() * Z(); | ||
1070 | + size_t B = Z(); //calculate the number of spectral elements | ||
1071 | + | ||
1072 | + double* F = (double*)malloc(sizeof(double) * B * X()); //allocate space for the frame that will be pulled from the file | ||
1073 | + double* F_dev; | ||
1074 | + HANDLE_ERROR(cudaMalloc(&F_dev, X() * B * sizeof(double))); //allocate space for the frame on the GPU | ||
1075 | + double* s_dev; //declare a device pointer that will store the spectrum on the GPU | ||
1076 | + double* A_dev; //declare a device pointer that will store the covariance matrix on the GPU | ||
1077 | + double* avg_dev; //declare a device pointer that will store the average spectrum | ||
1078 | + HANDLE_ERROR(cudaMalloc(&s_dev, B * sizeof(double))); //allocate space on the CUDA device for a spectrum | ||
1079 | + HANDLE_ERROR(cudaMalloc(&A_dev, B * B * sizeof(double))); //allocate space on the CUDA device for the covariance matrix | ||
1080 | + HANDLE_ERROR(cudaMemset(A_dev, 0, B * B * sizeof(double))); //initialize the covariance matrix to zero (0) | ||
1081 | + HANDLE_ERROR(cudaMalloc(&avg_dev, XB * sizeof(double))); //allocate space on the CUDA device for the average spectrum | ||
1082 | + for(size_t x = 0; x < X(); x++) //make multiple copies of the average spectrum in order to build a matrix | ||
1083 | + HANDLE_ERROR(cudaMemcpy(&avg_dev[x * B], avg, B * sizeof(double), cudaMemcpyHostToDevice)); | ||
1084 | + //stat = cublasSetVector((int)B, sizeof(double), avg, 1, avg_dev, 1); //copy the average spectrum to the CUDA device | ||
1085 | + | ||
1086 | + double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product) | ||
1087 | + double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) | ||
1088 | + | ||
1089 | + CUBLAS_HANDLE_ERROR(stat = cublasCreate(&handle)); //create a cuBLAS instance | ||
1090 | + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid | ||
1091 | + | ||
1092 | + else std::cout<<"Using cuBLAS to calculate the mean covariance matrix..."<<std::endl; | ||
1093 | + double beta = 1.0; | ||
1094 | + size_t x, y; | ||
1095 | + for(y = 0; y < Y(); y++){ //for each line | ||
1096 | + read_plane_zxd(F, y); //read a frame from the file | ||
1097 | + HANDLE_ERROR(cudaMemcpy(F_dev, F, XB * sizeof(double), cudaMemcpyHostToDevice)); //copy the frame to the GPU | ||
1098 | + CUBLAS_HANDLE_ERROR(cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, (int)B, (int)X(), &axpy_alpha, avg_dev, (int)B, &beta, F_dev, (int)B, F_dev, (int)B));//subtract the mean spectrum | ||
1099 | + | ||
1100 | + for(x = 0; x < X(); x++) | ||
1101 | + CUBLAS_HANDLE_ERROR(cublasDsyr(handle, CUBLAS_FILL_MODE_UPPER, (int)B, &ger_alpha, &F_dev[x*B], 1, A_dev, (int)B)); //perform an outer product | ||
1102 | + if(PROGRESS) progress = (double)(y + 1) / Y() * 100; | ||
1103 | + } | ||
1104 | + | ||
1105 | + cublasGetMatrix((int)B, (int)B, sizeof(double), A_dev, (int)B, co, (int)B); //copy the result from the GPU to the CPU | ||
1106 | + | ||
1107 | + cudaFree(A_dev); //clean up allocated device memory | ||
1108 | + cudaFree(s_dev); | ||
1109 | + cudaFree(avg_dev); | ||
1110 | + | ||
1111 | + for(unsigned long long i = 0; i < B; i++){ //copy the upper triangular portion to the lower triangular portion | ||
1112 | + for(unsigned long long j = i+1; j < B; j++){ | ||
1113 | + co[B * i + j] = co[B * j + i]; | ||
1114 | + } | ||
1115 | + } | ||
1116 | + | ||
1117 | + return 0; | ||
1118 | + | ||
1119 | + | ||
1120 | + | ||
1121 | + } | ||
1122 | + | ||
1123 | + | ||
1025 | /// Calculate the covariance matrix for all masked pixels in the image. | 1124 | /// Calculate the covariance matrix for all masked pixels in the image. |
1026 | 1125 | ||
1027 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix | 1126 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1028 | /// @param avg is a pointer to memory of size B that stores the average spectrum | 1127 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1029 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1128 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1030 | - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | 1129 | + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ |
1031 | progress = 0; | 1130 | progress = 0; |
1131 | + | ||
1132 | + if(use_gpu){ | ||
1133 | + int dev_count; | ||
1134 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | ||
1135 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | ||
1136 | + cudaDeviceProp prop; | ||
1137 | + int best_device_id = 0; //stores the best CUDA device | ||
1138 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1139 | + std::cout<<"CUDA devices:"<<std::endl; | ||
1140 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | ||
1141 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1142 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1143 | + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | ||
1144 | + if(cc > best_device_cc){ | ||
1145 | + best_device_cc = cc; //if this is better than the previous device, use it | ||
1146 | + best_device_id = d; | ||
1147 | + } | ||
1148 | + } | ||
1149 | + | ||
1150 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | ||
1151 | + std::cout<<"Using device "<<best_device_id<<std::endl; | ||
1152 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | ||
1153 | + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1154 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | ||
1155 | + } //otherwise continue using the CPU | ||
1156 | + | ||
1157 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | ||
1158 | + } | ||
1159 | + | ||
1032 | //memory allocation | 1160 | //memory allocation |
1033 | unsigned long long xy = X() * Y(); | 1161 | unsigned long long xy = X() * Y(); |
1034 | unsigned long long B = Z(); | 1162 | unsigned long long B = Z(); |
@@ -1325,7 +1453,7 @@ public: | @@ -1325,7 +1453,7 @@ public: | ||
1325 | c = (T*)malloc( L ); //allocate space for the slice | 1453 | c = (T*)malloc( L ); //allocate space for the slice |
1326 | 1454 | ||
1327 | for(unsigned long long j = 0; j < Y(); j++){ //for each line | 1455 | for(unsigned long long j = 0; j < Y(); j++){ //for each line |
1328 | - read_plane_y(c, j); //load the line into memory | 1456 | + read_plane_xz(c, j); //load the line into memory |
1329 | for(unsigned long long i = 0; i < B; i++){ //for each band | 1457 | for(unsigned long long i = 0; i < B; i++){ //for each band |
1330 | for(unsigned long long m = 0; m < X(); m++){ //for each sample | 1458 | for(unsigned long long m = 0; m < X(); m++){ //for each sample |
1331 | if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked | 1459 | if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked |
@@ -1355,7 +1483,7 @@ public: | @@ -1355,7 +1483,7 @@ public: | ||
1355 | c = (T*)malloc( L ); //allocate space for the slice | 1483 | c = (T*)malloc( L ); //allocate space for the slice |
1356 | 1484 | ||
1357 | for(unsigned long long j = 0; j < Y(); j++){ //for each line | 1485 | for(unsigned long long j = 0; j < Y(); j++){ //for each line |
1358 | - read_plane_y(c, j); //load the line into memory | 1486 | + read_plane_xz(c, j); //load the line into memory |
1359 | for(unsigned long long i = 0; i < B; i++){ //for each band | 1487 | for(unsigned long long i = 0; i < B; i++){ //for each band |
1360 | for(unsigned long long m = 0; m < X(); m++){ //for each sample | 1488 | for(unsigned long long m = 0; m < X(); m++){ //for each sample |
1361 | if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked | 1489 | if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked |
stim/envi/bip.h
@@ -5,13 +5,16 @@ | @@ -5,13 +5,16 @@ | ||
5 | #include "../envi/bil.h" | 5 | #include "../envi/bil.h" |
6 | #include "../envi/hsi.h" | 6 | #include "../envi/hsi.h" |
7 | #include <cstring> | 7 | #include <cstring> |
8 | +#include <complex> | ||
8 | #include <utility> | 9 | #include <utility> |
9 | 10 | ||
10 | //CUDA | 11 | //CUDA |
11 | -#ifdef CUDA_FOUND | ||
12 | - #include <cuda_runtime.h> | ||
13 | - #include "cublas_v2.h" | ||
14 | -#endif | 12 | +//#ifdef CUDA_FOUND |
13 | +#include <stim/cuda/cudatools/error.h> | ||
14 | +#include <cuda_runtime.h> | ||
15 | +#include "cublas_v2.h" | ||
16 | +#include "cufft.h" | ||
17 | +//#endif | ||
15 | 18 | ||
16 | namespace stim{ | 19 | namespace stim{ |
17 | 20 | ||
@@ -257,7 +260,7 @@ public: | @@ -257,7 +260,7 @@ public: | ||
257 | } | 260 | } |
258 | 261 | ||
259 | //given a Y ,return a ZX slice | 262 | //given a Y ,return a ZX slice |
260 | - bool read_plane_y(T * p, unsigned long long y){ | 263 | + bool read_plane_y(T * p, size_t y){ |
261 | return binary<T>::read_plane_2(p, y); | 264 | return binary<T>::read_plane_2(p, y); |
262 | } | 265 | } |
263 | 266 | ||
@@ -954,39 +957,43 @@ public: | @@ -954,39 +957,43 @@ public: | ||
954 | 957 | ||
955 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum | 958 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum |
956 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 959 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
957 | - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){ | 960 | + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){ |
958 | unsigned long long XY = X() * Y(); //calculate the total number of pixels in the HSI | 961 | unsigned long long XY = X() * Y(); //calculate the total number of pixels in the HSI |
959 | T* temp = (T*)malloc(sizeof(T) * Z()); //allocate space for the current spectrum to be read | 962 | T* temp = (T*)malloc(sizeof(T) * Z()); //allocate space for the current spectrum to be read |
960 | - memset(p, 0, sizeof(double) * Z()); //initialize the average spectrum to zero (0) | ||
961 | - //for (unsigned j = 0; j < Z(); j++){ | ||
962 | - // p[j] = 0; | ||
963 | - //} | 963 | + memset(m, 0, Z() * sizeof(double)); //set the mean spectrum to zero |
964 | + double* e_x2 = (double*)malloc(Z() * sizeof(double)); //allocate space for E[x^2] | ||
965 | + memset(e_x2, 0, Z() * sizeof(double)); //set all values for E[x^2] to zero | ||
964 | 966 | ||
965 | unsigned long long count = nnz(mask); //calculate the number of masked pixels | 967 | unsigned long long count = nnz(mask); //calculate the number of masked pixels |
966 | - | 968 | + double x; |
967 | for (unsigned long long i = 0; i < XY; i++){ //for each pixel in the HSI | 969 | for (unsigned long long i = 0; i < XY; i++){ //for each pixel in the HSI |
968 | if (mask == NULL || mask[i] != 0){ //if the pixel is masked | 970 | if (mask == NULL || mask[i] != 0){ //if the pixel is masked |
969 | pixel(temp, i); //get the spectrum | 971 | pixel(temp, i); //get the spectrum |
970 | for (unsigned long long j = 0; j < Z(); j++){ //for each spectral component | 972 | for (unsigned long long j = 0; j < Z(); j++){ //for each spectral component |
971 | - p[j] += (double)temp[j] / (double)count; //add the weighted value to the average | 973 | + x = temp[j]; |
974 | + m[j] += x / (double)count; //add the weighted value to the average | ||
975 | + e_x2[j] += x*x / (double)count; | ||
972 | } | 976 | } |
973 | } | 977 | } |
974 | if(PROGRESS) progress = (double)(i+1) / XY * 100; //increment the progress | 978 | if(PROGRESS) progress = (double)(i+1) / XY * 100; //increment the progress |
975 | } | 979 | } |
976 | 980 | ||
981 | + //calculate the standard deviation | ||
982 | + for(size_t i = 0; i < Z(); i++) | ||
983 | + std[i] = sqrt(e_x2[i] - m[i] * m[i]); | ||
984 | + | ||
977 | free(temp); | 985 | free(temp); |
978 | return true; | 986 | return true; |
979 | } | 987 | } |
980 | -#ifdef CUDA_FOUND | 988 | +//#ifdef CUDA_FOUND |
981 | /// Calculate the covariance matrix for masked pixels using cuBLAS | 989 | /// Calculate the covariance matrix for masked pixels using cuBLAS |
982 | /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra | 990 | /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra |
983 | - bool co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | 991 | + int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ |
984 | 992 | ||
985 | cudaError_t cudaStat; | 993 | cudaError_t cudaStat; |
986 | cublasStatus_t stat; | 994 | cublasStatus_t stat; |
987 | cublasHandle_t handle; | 995 | cublasHandle_t handle; |
988 | 996 | ||
989 | - progress = 0; //initialize the progress to zero (0) | ||
990 | unsigned long long XY = X() * Y(); //calculate the number of elements in a band image | 997 | unsigned long long XY = X() * Y(); //calculate the number of elements in a band image |
991 | unsigned long long B = Z(); //calculate the number of spectral elements | 998 | unsigned long long B = Z(); //calculate the number of spectral elements |
992 | 999 | ||
@@ -1004,10 +1011,9 @@ public: | @@ -1004,10 +1011,9 @@ public: | ||
1004 | double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) | 1011 | double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) |
1005 | 1012 | ||
1006 | stat = cublasCreate(&handle); //create a cuBLAS instance | 1013 | stat = cublasCreate(&handle); //create a cuBLAS instance |
1007 | - if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid | ||
1008 | - printf ("CUBLAS initialization failed\n"); | ||
1009 | - return EXIT_FAILURE; | ||
1010 | - } | 1014 | + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid |
1015 | + | ||
1016 | + else std::cout<<"Using cuBLAS to calculate the mean covariance matrix..."<<std::endl; | ||
1011 | for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel | 1017 | for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel |
1012 | if (mask == NULL || mask[xy] != 0){ | 1018 | if (mask == NULL || mask[xy] != 0){ |
1013 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location | 1019 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location |
@@ -1031,26 +1037,45 @@ public: | @@ -1031,26 +1037,45 @@ public: | ||
1031 | } | 1037 | } |
1032 | } | 1038 | } |
1033 | 1039 | ||
1034 | - return true; | 1040 | + return 0; |
1035 | } | 1041 | } |
1036 | -#endif | 1042 | +//#endif |
1037 | 1043 | ||
1038 | /// Calculate the covariance matrix for all masked pixels in the image with 64-bit floating point precision. | 1044 | /// Calculate the covariance matrix for all masked pixels in the image with 64-bit floating point precision. |
1039 | 1045 | ||
1040 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix | 1046 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1041 | /// @param avg is a pointer to memory of size B that stores the average spectrum | 1047 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1042 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1048 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1043 | - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | ||
1044 | - | ||
1045 | -#ifdef CUDA_FOUND | ||
1046 | - int dev_count; | ||
1047 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | ||
1048 | - cudaDeviceProp prop; | ||
1049 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | ||
1050 | - if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator | ||
1051 | - return co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1052 | -#endif | 1049 | + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ |
1053 | progress = 0; | 1050 | progress = 0; |
1051 | + | ||
1052 | + if(use_gpu){ | ||
1053 | + int dev_count; | ||
1054 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | ||
1055 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | ||
1056 | + cudaDeviceProp prop; | ||
1057 | + int best_device_id = 0; //stores the best CUDA device | ||
1058 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1059 | + std::cout<<"CUDA devices----"<<std::endl; | ||
1060 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | ||
1061 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1062 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1063 | + std::cout<<d<<": ["<<prop.major<<"."<<prop.minor<<"] "<<prop.name<<std::endl; //display the device information | ||
1064 | + if(cc > best_device_cc){ | ||
1065 | + best_device_cc = cc; //if this is better than the previous device, use it | ||
1066 | + best_device_id = d; | ||
1067 | + } | ||
1068 | + } | ||
1069 | + | ||
1070 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | ||
1071 | + std::cout<<"Using device "<<best_device_id<<std::endl; | ||
1072 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | ||
1073 | + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1074 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | ||
1075 | + } //otherwise continue using the CPU | ||
1076 | + | ||
1077 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | ||
1078 | + } | ||
1054 | //memory allocation | 1079 | //memory allocation |
1055 | unsigned long long XY = X() * Y(); | 1080 | unsigned long long XY = X() * Y(); |
1056 | unsigned long long B = Z(); | 1081 | unsigned long long B = Z(); |
@@ -1092,10 +1117,10 @@ public: | @@ -1092,10 +1117,10 @@ public: | ||
1092 | } | 1117 | } |
1093 | 1118 | ||
1094 | 1119 | ||
1095 | -#ifdef CUDA_FOUND | 1120 | +//#ifdef CUDA_FOUND |
1096 | /// Calculate the covariance matrix of Noise for masked pixels using cuBLAS | 1121 | /// Calculate the covariance matrix of Noise for masked pixels using cuBLAS |
1097 | /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra | 1122 | /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra |
1098 | - bool coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ | 1123 | + int coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ |
1099 | 1124 | ||
1100 | cudaError_t cudaStat; | 1125 | cudaError_t cudaStat; |
1101 | cublasStatus_t stat; | 1126 | cublasStatus_t stat; |
@@ -1123,11 +1148,9 @@ public: | @@ -1123,11 +1148,9 @@ public: | ||
1123 | double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product) | 1148 | double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product) |
1124 | double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) | 1149 | double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) |
1125 | 1150 | ||
1126 | - stat = cublasCreate(&handle); //create a cuBLAS instance | ||
1127 | - if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid | ||
1128 | - printf ("CUBLAS initialization failed\n"); | ||
1129 | - return EXIT_FAILURE; | ||
1130 | - } | 1151 | + CUBLAS_HANDLE_ERROR(cublasCreate(&handle)); //create a cuBLAS instance |
1152 | + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid | ||
1153 | + | ||
1131 | for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel | 1154 | for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel |
1132 | if (mask == NULL || mask[xy] != 0){ | 1155 | if (mask == NULL || mask[xy] != 0){ |
1133 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location | 1156 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location |
@@ -1158,27 +1181,44 @@ public: | @@ -1158,27 +1181,44 @@ public: | ||
1158 | } | 1181 | } |
1159 | } | 1182 | } |
1160 | 1183 | ||
1161 | - return true; | 1184 | + return 0; |
1162 | } | 1185 | } |
1163 | -#endif | 1186 | +//#endif |
1164 | 1187 | ||
1165 | /// Calculate the covariance of noise matrix for all masked pixels in the image with 64-bit floating point precision. | 1188 | /// Calculate the covariance of noise matrix for all masked pixels in the image with 64-bit floating point precision. |
1166 | 1189 | ||
1167 | /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix | 1190 | /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1168 | /// @param avg is a pointer to memory of size B that stores the average spectrum | 1191 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1169 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1192 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1170 | - bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ | ||
1171 | - | ||
1172 | -#ifdef CUDA_FOUND | ||
1173 | - int dev_count; | ||
1174 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | ||
1175 | - cudaDeviceProp prop; | ||
1176 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | ||
1177 | - if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator | ||
1178 | - return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1179 | -#endif | ||
1180 | - | ||
1181 | - | 1193 | + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ |
1194 | + | ||
1195 | + if(use_gpu){ | ||
1196 | + int dev_count; | ||
1197 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | ||
1198 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | ||
1199 | + cudaDeviceProp prop; | ||
1200 | + int best_device_id = 0; //stores the best CUDA device | ||
1201 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1202 | + std::cout<<"CUDA devices:"<<std::endl; | ||
1203 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | ||
1204 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1205 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1206 | + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | ||
1207 | + if(cc > best_device_cc){ | ||
1208 | + best_device_cc = cc; //if this is better than the previous device, use it | ||
1209 | + best_device_id = d; | ||
1210 | + } | ||
1211 | + } | ||
1212 | + | ||
1213 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | ||
1214 | + std::cout<<"Using device "<<best_device_id<<std::endl; | ||
1215 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | ||
1216 | + int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1217 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | ||
1218 | + } //otherwise continue using the CPU | ||
1219 | + | ||
1220 | + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl; | ||
1221 | + } | ||
1182 | 1222 | ||
1183 | progress = 0; | 1223 | progress = 0; |
1184 | //memory allocation | 1224 | //memory allocation |
@@ -1443,7 +1483,7 @@ public: | @@ -1443,7 +1483,7 @@ public: | ||
1443 | unsigned long long jump_sample = ( (Z() - b1) + b0 ) * sizeof(T); | 1483 | unsigned long long jump_sample = ( (Z() - b1) + b0 ) * sizeof(T); |
1444 | 1484 | ||
1445 | //distance between sample spectra in adjacent lines | 1485 | //distance between sample spectra in adjacent lines |
1446 | - unsigned long long jump_line = (X() - x1) * Z() * sizeof(T); | 1486 | + unsigned long long jump_line = ( X() - x1 + x0 ) * Z() * sizeof(T); |
1447 | 1487 | ||
1448 | 1488 | ||
1449 | //unsigned long long sp = y0 * X() + x0; //start pixel | 1489 | //unsigned long long sp = y0 * X() + x0; //start pixel |
@@ -1682,7 +1722,117 @@ public: | @@ -1682,7 +1722,117 @@ public: | ||
1682 | return true; | 1722 | return true; |
1683 | } | 1723 | } |
1684 | 1724 | ||
1725 | + int fft(std::string outname, size_t bandmin, size_t bandmax, size_t samples = 0, T* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int device = 0){ | ||
1726 | + if(device == -1){ | ||
1727 | + std::cout<<"ERROR: GPU required for FFT (uses cuFFT)."<<std::endl; | ||
1728 | + exit(1); | ||
1729 | + } | ||
1730 | + if(samples == 0) samples = Z(); //if samples are specified, use all of them | ||
1731 | + if(samples > Z()){ | ||
1732 | + std::cout<<"ERROR: stim::envi doesn't support FFT padding just yet."<<std::endl; | ||
1733 | + exit(1); | ||
1734 | + } | ||
1735 | + int nd; //stores the number of CUDA devices | ||
1736 | + HANDLE_ERROR(cudaGetDeviceCount(&nd)); //get the number of CUDA devices | ||
1737 | + if(device >= nd){ //test for the existence of the requested device | ||
1738 | + std::cout<<"ERROR: requested CUDA device for stim::envi::fft() doesn't exist"<<std::endl; | ||
1739 | + exit(1); | ||
1740 | + } | ||
1741 | + HANDLE_ERROR(cudaSetDevice(device)); //set the CUDA device | ||
1742 | + cudaDeviceProp prop; | ||
1743 | + HANDLE_ERROR(cudaGetDeviceProperties(&prop, device)); //get the CUDA device properties | ||
1744 | + | ||
1745 | + size_t B = Z(); | ||
1746 | + size_t S = samples; | ||
1747 | + size_t fft_size = S * sizeof(T); //number of bytes for each FFT | ||
1748 | + size_t cuda_bytes = prop.totalGlobalMem; //get the number of bytes of global memory available | ||
1749 | + size_t cuda_use = (size_t)floor(cuda_bytes * 0.2); //only use 80% | ||
1750 | + size_t nS = cuda_use / fft_size; //calculate the number of spectra that can be loaded onto the GPU as a single batch | ||
1751 | + size_t batch_bytes = nS * fft_size; //calculate the size of a batch (in bytes) | ||
1752 | + size_t fft_bytes = nS * (S/2 + 1) * sizeof(cufftComplex); | ||
1753 | + T* batch = (T*) malloc(batch_bytes); //allocate space in host memory to store a batch | ||
1754 | + memset(batch, 0, batch_bytes); | ||
1755 | + std::complex<T>* batch_fft = (std::complex<T>*) malloc(fft_bytes); | ||
1756 | + T* gpu_batch; //device pointer to the batch | ||
1757 | + HANDLE_ERROR(cudaMalloc(&gpu_batch, batch_bytes)); //allocate space on the device for the FFT batch | ||
1758 | + cufftComplex* gpu_batch_fft; //allocate space for the FFT result | ||
1759 | + HANDLE_ERROR(cudaMalloc(&gpu_batch_fft, fft_bytes)); | ||
1760 | + int N[1]; //create an array with the interferogram size (required for cuFFT input) | ||
1761 | + N[0] = (int)S; //set the only array value to the interferogram size | ||
1762 | + | ||
1763 | + //if a background is provided for a ratio | ||
1764 | + std::complex<T>* ratio_fft = NULL; //create a pointer for the FFT of the ratio image (if it exists) | ||
1765 | + if(ratio){ | ||
1766 | + size_t bkg_bytes = rx * ry * S * sizeof(T); //calculate the total number of bytes in the background image | ||
1767 | + T* bkg_copy = (T*) malloc(bkg_bytes); //allocate space to copy the background | ||
1768 | + if(S == Z()) memcpy(bkg_copy, ratio, bkg_bytes); //if the number of samples used in processing equals the number of available samples | ||
1769 | + else{ | ||
1770 | + for(size_t xyi = 0; xyi < rx*ry; xyi++) | ||
1771 | + memcpy(&bkg_copy[xyi * S], &ratio[xyi * B], S * sizeof(T)); | ||
1772 | + } | ||
1773 | + T* gpu_ratio; | ||
1774 | + HANDLE_ERROR(cudaMalloc(&gpu_ratio, bkg_bytes)); | ||
1775 | + HANDLE_ERROR(cudaMemcpy(gpu_ratio, bkg_copy, bkg_bytes, cudaMemcpyHostToDevice)); | ||
1776 | + cufftHandle bkg_plan; | ||
1777 | + CUFFT_HANDLE_ERROR(cufftPlanMany(&bkg_plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)(rx * ry))); | ||
1778 | + size_t bkg_fft_bytes = rx * ry * (S / 2 + 1) * sizeof(cufftComplex); | ||
1779 | + T* gpu_ratio_fft; | ||
1780 | + HANDLE_ERROR(cudaMalloc(&gpu_ratio_fft, bkg_fft_bytes)); | ||
1781 | + CUFFT_HANDLE_ERROR(cufftExecR2C(bkg_plan, (cufftReal*)gpu_ratio, (cufftComplex*)gpu_ratio_fft)); | ||
1782 | + ratio_fft = (std::complex<T>*) malloc(bkg_fft_bytes); | ||
1783 | + HANDLE_ERROR(cudaMemcpy(ratio_fft, gpu_ratio_fft, bkg_fft_bytes, cudaMemcpyDeviceToHost)); | ||
1784 | + HANDLE_ERROR(cudaFree(gpu_ratio)); | ||
1785 | + HANDLE_ERROR(cudaFree(gpu_ratio_fft)); | ||
1786 | + CUFFT_HANDLE_ERROR(cufftDestroy(bkg_plan)); | ||
1787 | + } | ||
1685 | 1788 | ||
1789 | + cufftHandle plan; //create a CUFFT plan | ||
1790 | + CUFFT_HANDLE_ERROR(cufftPlanMany(&plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)nS)); | ||
1791 | + | ||
1792 | + std::ofstream outfile(outname, std::ios::binary); //open a file for writing | ||
1793 | + | ||
1794 | + size_t XY = X() * Y(); //calculate the number of spectra | ||
1795 | + size_t xy = 0; | ||
1796 | + size_t bs; //stores the number of spectra in the current batch | ||
1797 | + size_t s, b; | ||
1798 | + size_t S_fft = S/2 + 1; | ||
1799 | + size_t bandkeep = bandmax - bandmin + 1; | ||
1800 | + size_t x, y; | ||
1801 | + size_t ratio_i; | ||
1802 | + T* temp_spec = (T*) malloc(Z() * sizeof(T)); //allocate space to hold a single pixel | ||
1803 | + while(xy < XY){ //while there are unprocessed spectra | ||
1804 | + bs = min(XY - xy, nS); //calculate the number of spectra to include in the batch | ||
1805 | + for(s = 0; s < bs; s++){ //for each spectrum in the batch | ||
1806 | + pixel(temp_spec, xy + s); //read a pixel from disk | ||
1807 | + memcpy(&batch[s * S], temp_spec, S * sizeof(T)); | ||
1808 | + //pixel(&batch[s * S], xy + s); //read the next spectrum | ||
1809 | + } | ||
1810 | + HANDLE_ERROR(cudaMemcpy(gpu_batch, batch, batch_bytes, cudaMemcpyHostToDevice)); | ||
1811 | + CUFFT_HANDLE_ERROR(cufftExecR2C(plan, (cufftReal*)gpu_batch, gpu_batch_fft)); //execute the (implicitly forward) transform | ||
1812 | + HANDLE_ERROR(cudaMemcpy(batch_fft, gpu_batch_fft, fft_bytes, cudaMemcpyDeviceToHost)); //copy the data back to the GPU | ||
1813 | + for(s = 0; s < bs; s++){ //for each spectrum in the batch | ||
1814 | + y = (xy + s)/X(); | ||
1815 | + x = xy + s - y * X(); | ||
1816 | + if(ratio_fft) ratio_i = (y % ry) * rx + (x % rx); //if a background is used, calculate the coordinates into it | ||
1817 | + for(b = 0; b < S/2 + 1; b++){ //for each sample | ||
1818 | + if(ratio_fft) | ||
1819 | + batch[s * S + b] = -log(abs(batch_fft[s * S_fft + b]) / abs(ratio_fft[ratio_i * S_fft + b])); | ||
1820 | + else | ||
1821 | + batch[s * S + b] = abs(batch_fft[s * S_fft + b]); //calculate the magnitude of the spectrum | ||
1822 | + } | ||
1823 | + outfile.write((char*)&batch[s * S + bandmin], bandkeep * sizeof(T)); //save the resulting spectrum | ||
1824 | + } | ||
1825 | + xy += bs; //increment xy by the number of spectra processed | ||
1826 | + if(PROGRESS) progress = (double)xy / (double)XY * 100; | ||
1827 | + } | ||
1828 | + outfile.close(); | ||
1829 | + free(ratio_fft); | ||
1830 | + free(batch_fft); | ||
1831 | + free(batch); | ||
1832 | + HANDLE_ERROR(cudaFree(gpu_batch)); | ||
1833 | + HANDLE_ERROR(cudaFree(gpu_batch_fft)); | ||
1834 | + return 0; | ||
1835 | + } | ||
1686 | 1836 | ||
1687 | /// Close the file. | 1837 | /// Close the file. |
1688 | bool close(){ | 1838 | bool close(){ |
stim/envi/bsq.h
@@ -104,6 +104,7 @@ public: | @@ -104,6 +104,7 @@ public: | ||
104 | //if wavelength is smaller than the first one in header file | 104 | //if wavelength is smaller than the first one in header file |
105 | if ( w[page] > wavelength ){ | 105 | if ( w[page] > wavelength ){ |
106 | band_index(p, page); | 106 | band_index(p, page); |
107 | + if(PROGRESS) progress = 100; | ||
107 | return true; | 108 | return true; |
108 | } | 109 | } |
109 | 110 | ||
@@ -114,6 +115,7 @@ public: | @@ -114,6 +115,7 @@ public: | ||
114 | // (the wavelength is out of bounds) | 115 | // (the wavelength is out of bounds) |
115 | if (page == Z()) { | 116 | if (page == Z()) { |
116 | band_index(p, Z()-1); //return the last band | 117 | band_index(p, Z()-1); //return the last band |
118 | + if(PROGRESS) progress = 100; | ||
117 | return true; | 119 | return true; |
118 | } | 120 | } |
119 | } | 121 | } |
@@ -561,12 +563,12 @@ public: | @@ -561,12 +563,12 @@ public: | ||
561 | free(src[1]); | 563 | free(src[1]); |
562 | free(dst[0]); | 564 | free(dst[0]); |
563 | free(dst[1]); | 565 | free(dst[1]); |
564 | - //if(VERBOSE){ | 566 | + if(VERBOSE){ |
565 | std::cout<<"total time to execute bsq::bip(): "<<t_total<<" ms"<<std::endl; | 567 | std::cout<<"total time to execute bsq::bip(): "<<t_total<<" ms"<<std::endl; |
566 | std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; | 568 | std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; |
567 | std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; | 569 | std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; |
568 | std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; | 570 | std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; |
569 | - //} | 571 | + } |
570 | return true; //return true | 572 | return true; //return true |
571 | } | 573 | } |
572 | 574 | ||
@@ -1120,27 +1122,61 @@ public: | @@ -1120,27 +1122,61 @@ public: | ||
1120 | 1122 | ||
1121 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum | 1123 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum |
1122 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1124 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1123 | - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){ | 1125 | + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){ |
1124 | unsigned long long XY = X() * Y(); | 1126 | unsigned long long XY = X() * Y(); |
1125 | - unsigned long long count = 0; //count will store the number of masked pixels | 1127 | + unsigned long long count = nnz(mask); //count will store the number of masked pixels |
1126 | T* temp = (T*)malloc(sizeof(T) * XY); | 1128 | T* temp = (T*)malloc(sizeof(T) * XY); |
1127 | - //calculate this loop counts the number of true pixels in the mask | ||
1128 | - for (unsigned j = 0; j < XY; j++){ | ||
1129 | - if (mask == NULL || mask[j] != 0){ | ||
1130 | - count++; | ||
1131 | - } | ||
1132 | - } | 1129 | + |
1133 | //this loops goes through each band in B (Z()) | 1130 | //this loops goes through each band in B (Z()) |
1134 | // masked (or valid) pixels from that band are averaged and the average is stored in p | 1131 | // masked (or valid) pixels from that band are averaged and the average is stored in p |
1132 | + double e_x; //stores E[x]^2 | ||
1133 | + double e_x2; //stores E[x^2] | ||
1134 | + double x; | ||
1135 | for (unsigned long long i = 0; i < Z(); i++){ | 1135 | for (unsigned long long i = 0; i < Z(); i++){ |
1136 | - p[i] = 0; | 1136 | + e_x = 0; |
1137 | + e_x2 = 0; | ||
1137 | band_index(temp, i); //get the band image and store it in temp | 1138 | band_index(temp, i); //get the band image and store it in temp |
1138 | for (unsigned long long j = 0; j < XY; j++){ //loop through temp, averaging valid pixels | 1139 | for (unsigned long long j = 0; j < XY; j++){ //loop through temp, averaging valid pixels |
1139 | if (mask == NULL || mask[j] != 0){ | 1140 | if (mask == NULL || mask[j] != 0){ |
1140 | - p[i] += (double)temp[j] / (double)count; | 1141 | + x = (double)temp[j]; |
1142 | + e_x += x / (double)count; //sum the expected value of x | ||
1143 | + e_x2 += (x * x) / (double)count; //sum the expected value of x^2 | ||
1141 | } | 1144 | } |
1142 | } | 1145 | } |
1143 | - if(PROGRESS) progress = (double)(i+1) / Z() * 100; | 1146 | + m[i] = e_x; //store the mean |
1147 | + std[i] = sqrt(e_x2 - e_x * e_x); //calculate the standard deviation | ||
1148 | + if(PROGRESS) progress = (double)(i+1) / Z() * 100; //update the progress counter | ||
1149 | + } | ||
1150 | + free(temp); | ||
1151 | + return true; | ||
1152 | + } | ||
1153 | + | ||
1154 | + /// Calculate the median value for all masked (or valid) pixels in a band and returns the median spectrum | ||
1155 | + | ||
1156 | + /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum | ||
1157 | + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | ||
1158 | + bool median_spectrum(double* m, unsigned char* mask = NULL, bool PROGRESS = false){ | ||
1159 | + size_t XY = X() * Y(); | ||
1160 | + size_t count = nnz(mask); //count will store the number of masked pixels | ||
1161 | + T* temp = (T*)malloc(sizeof(T) * XY); | ||
1162 | + | ||
1163 | + std::vector<T> band_values(count); //create an STD vector of band values | ||
1164 | + | ||
1165 | + //this loops goes through each band in B (Z()) | ||
1166 | + // masked (or valid) pixels from that band are averaged and the average is stored in p | ||
1167 | + size_t k; | ||
1168 | + for (size_t i = 0; i < Z(); i++){ //for each band | ||
1169 | + band_index(temp, i); //get the band image and store it in temp | ||
1170 | + k = 0; //initialize the band_value index to zero | ||
1171 | + for (size_t j = 0; j < XY; j++){ //loop through temp, averaging valid pixels | ||
1172 | + if (mask == NULL || mask[j] != 0){ | ||
1173 | + band_values[k] = temp[j]; //store the value in the band_values array | ||
1174 | + k++; //increment the band_values index | ||
1175 | + } | ||
1176 | + } | ||
1177 | + std::sort(band_values.begin(), band_values.end()); //sort all of the values in the band | ||
1178 | + m[i] = band_values[ count/2 ]; //store the center value in the array | ||
1179 | + if(PROGRESS) progress = (double)(i+1) / Z() * 100; //update the progress counter | ||
1144 | } | 1180 | } |
1145 | free(temp); | 1181 | free(temp); |
1146 | return true; | 1182 | return true; |
@@ -1203,6 +1239,52 @@ public: | @@ -1203,6 +1239,52 @@ public: | ||
1203 | return true; | 1239 | return true; |
1204 | } | 1240 | } |
1205 | 1241 | ||
1242 | + ///Crop out several subimages and assemble a new image from these concatenated subimages | ||
1243 | + | ||
1244 | + /// @param outfile is the file name for the output image | ||
1245 | + /// @param sx is the width of each subimage | ||
1246 | + /// @param sy is the height of each subimage | ||
1247 | + /// @mask is the mask used to define subimage positions extracted from the input file | ||
1248 | + void subimages(std::string outfile, size_t sx, size_t sy, unsigned char* mask, bool PROGRESS = false){ | ||
1249 | + | ||
1250 | + size_t N = nnz(mask); //get the number of subimages | ||
1251 | + T* dst = (T*) malloc(N * sx * sy * sizeof(T)); //allocate space for a single band of the output image | ||
1252 | + memset(dst, 0, N*sx*sy*sizeof(T)); //initialize the band image to zero | ||
1253 | + | ||
1254 | + std::ofstream out(outfile, std::ios::binary); //open a file for writing | ||
1255 | + | ||
1256 | + T* src = (T*) malloc(X() * Y() * sizeof(T)); | ||
1257 | + | ||
1258 | + for(size_t b = 0; b < Z(); b++){ //for each band | ||
1259 | + band_index(src, b); //load the band image | ||
1260 | + size_t i = 0; //create an image index and initialize it to zero | ||
1261 | + size_t n = 0; | ||
1262 | + while(n < N){ //for each subimage | ||
1263 | + if(mask[i]){ //if the pixel is masked, copy the surrounding pixels into the destination band | ||
1264 | + size_t yi = i / X(); //determine the y position of the current pixel | ||
1265 | + size_t xi = i - yi * X(); //determine the x position of the current pixel | ||
1266 | + if( xi > sx/2 && xi < X() - sx/2 && //if the subimage is completely within the bounds of the original image | ||
1267 | + yi > sy/2 && yi < Y() - sy/2){ | ||
1268 | + size_t cx = xi - sx/2; //calculate the corner position for the subimage | ||
1269 | + size_t cy = yi - sy/2; | ||
1270 | + for(size_t syi = 0; syi < sy; syi++){ //for each line in the subimage | ||
1271 | + size_t src_i = (cy + syi) * X() + cx; | ||
1272 | + //size_t dst_i = syi * (N * sx) + n * sx; | ||
1273 | + size_t dst_i = (n * sy + syi) * sx; | ||
1274 | + memcpy(&dst[dst_i], &src[src_i], sx * sizeof(T)); //copy one line from the subimage to the destination image | ||
1275 | + } | ||
1276 | + n++; | ||
1277 | + } | ||
1278 | + } | ||
1279 | + i++; | ||
1280 | + if(PROGRESS) progress = (double)( (n+1) * (b+1) ) / (N * Z()) * 100; | ||
1281 | + }//end while n | ||
1282 | + out.write((const char*)dst, N * sx * sy * sizeof(T)); //write the band to memory | ||
1283 | + } | ||
1284 | + free(dst); //free memory | ||
1285 | + free(src); | ||
1286 | + } | ||
1287 | + | ||
1206 | /// Remove a list of bands from the ENVI file | 1288 | /// Remove a list of bands from the ENVI file |
1207 | 1289 | ||
1208 | /// @param outfile is the file name for the output hyperspectral image (with trimmed bands) | 1290 | /// @param outfile is the file name for the output hyperspectral image (with trimmed bands) |
stim/envi/envi.h
@@ -6,6 +6,8 @@ | @@ -6,6 +6,8 @@ | ||
6 | #include "../envi/bip.h" | 6 | #include "../envi/bip.h" |
7 | #include "../envi/bil.h" | 7 | #include "../envi/bil.h" |
8 | #include "../math/fd_coefficients.h" | 8 | #include "../math/fd_coefficients.h" |
9 | +#include <stim/parser/filename.h> | ||
10 | +#include <stim/util/filesize.h> | ||
9 | #include <iostream> | 11 | #include <iostream> |
10 | #include <fstream> | 12 | #include <fstream> |
11 | //#include "../image/image.h" | 13 | //#include "../image/image.h" |
@@ -76,7 +78,31 @@ public: | @@ -76,7 +78,31 @@ public: | ||
76 | 78 | ||
77 | allocate(); | 79 | allocate(); |
78 | } | 80 | } |
81 | + //used to test if the current ENVI file is valid | ||
82 | + operator bool(){ | ||
83 | + if(file == NULL) return false; | ||
84 | + return true; | ||
85 | + } | ||
86 | + | ||
87 | + //test to determine if the specified file is an ENVI file | ||
88 | + static bool is_envi(std::string fname, std::string hname = ""){ | ||
89 | + stim::filename data_file(fname); | ||
90 | + stim::filename header_file; | ||
91 | + if(hname == ""){ //if the header isn't provided | ||
92 | + header_file = data_file; //assume that it's the same name as the data file, with a .hdr extension | ||
93 | + header_file = header_file.extension("hdr"); | ||
94 | + } | ||
95 | + else header_file = hname; //otherwise load the passed header | ||
96 | + | ||
97 | + stim::envi_header H; | ||
98 | + if(H.load(header_file) == false) //load the header file, if it doesn't load return false | ||
99 | + return false; | ||
100 | + size_t targetBytes = H.data_bytes(); //get the number of bytes that SHOULD be in the data file | ||
101 | + size_t bytes = stim::file_size(fname); | ||
102 | + if(bytes != targetBytes) return false; //if the data doesn't match the header, return false | ||
103 | + return true; //otherwise everything looks fine | ||
79 | 104 | ||
105 | + } | ||
80 | 106 | ||
81 | 107 | ||
82 | void* malloc_spectrum(){ | 108 | void* malloc_spectrum(){ |
@@ -359,11 +385,23 @@ public: | @@ -359,11 +385,23 @@ public: | ||
359 | 385 | ||
360 | fseek(f, 9, SEEK_SET); //seek to the number of bands | 386 | fseek(f, 9, SEEK_SET); //seek to the number of bands |
361 | short b; //allocate space for the number of bands | 387 | short b; //allocate space for the number of bands |
362 | - fread(&b, sizeof(short), 1, f); //read the number of bands | 388 | + size_t nread = fread(&b, sizeof(short), 1, f); //read the number of bands |
389 | + if(nread != 1){ | ||
390 | + std::cout<<"Error reading band number from Agilent file."<<std::endl; | ||
391 | + exit(1); | ||
392 | + } | ||
363 | fseek(f, 13, SEEK_CUR); //skip the the x and y dimensions | 393 | fseek(f, 13, SEEK_CUR); //skip the the x and y dimensions |
364 | short x, y; | 394 | short x, y; |
365 | - fread(&x, sizeof(short), 1, f); //read the image x and y size | ||
366 | - fread(&y, sizeof(short), 1, f); | 395 | + nread = fread(&x, sizeof(short), 1, f); //read the image x and y size |
396 | + if(nread != 1){ | ||
397 | + std::cout<<"Error reading X dimension from Agilent file."<<std::endl; | ||
398 | + exit(1); | ||
399 | + } | ||
400 | + nread = fread(&y, sizeof(short), 1, f); | ||
401 | + if(nread != 1){ | ||
402 | + std::cout<<"Error reading Y dimension from Agilent file."<<std::endl; | ||
403 | + exit(1); | ||
404 | + } | ||
367 | fclose(f); //close the file | 405 | fclose(f); //close the file |
368 | 406 | ||
369 | //store the information from the Agilent header in the ENVI header | 407 | //store the information from the Agilent header in the ENVI header |
@@ -1368,12 +1406,12 @@ public: | @@ -1368,12 +1406,12 @@ public: | ||
1368 | 1406 | ||
1369 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum | 1407 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum |
1370 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1408 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1371 | - bool avg_band(double * p, unsigned char* mask, bool PROGRESS = false){ | 1409 | + bool mean_spectrum(double * p, double* std, unsigned char* mask, bool PROGRESS = false){ |
1372 | if (header.interleave == envi_header::BSQ){ | 1410 | if (header.interleave == envi_header::BSQ){ |
1373 | if (header.data_type == envi_header::float32) | 1411 | if (header.data_type == envi_header::float32) |
1374 | - return ((bsq<float>*)file)->avg_band(p, mask, PROGRESS); | 1412 | + return ((bsq<float>*)file)->mean_spectrum(p, std, mask, PROGRESS); |
1375 | else if (header.data_type == envi_header::float64) | 1413 | else if (header.data_type == envi_header::float64) |
1376 | - return ((bsq<double>*)file)->avg_band(p, mask, PROGRESS); | 1414 | + return ((bsq<double>*)file)->mean_spectrum(p, std, mask, PROGRESS); |
1377 | else{ | 1415 | else{ |
1378 | std::cout << "ERROR: unidentified data type" << std::endl; | 1416 | std::cout << "ERROR: unidentified data type" << std::endl; |
1379 | exit(1); | 1417 | exit(1); |
@@ -1381,9 +1419,9 @@ public: | @@ -1381,9 +1419,9 @@ public: | ||
1381 | } | 1419 | } |
1382 | else if (header.interleave == envi_header::BIL){ | 1420 | else if (header.interleave == envi_header::BIL){ |
1383 | if (header.data_type == envi_header::float32) | 1421 | if (header.data_type == envi_header::float32) |
1384 | - return ((bil<float>*)file)->avg_band(p, mask, PROGRESS); | 1422 | + return ((bil<float>*)file)->mean_spectrum(p, std, mask, PROGRESS); |
1385 | else if (header.data_type == envi_header::float64) | 1423 | else if (header.data_type == envi_header::float64) |
1386 | - return ((bil<double>*)file)->avg_band(p, mask, PROGRESS); | 1424 | + return ((bil<double>*)file)->mean_spectrum(p, std, mask, PROGRESS); |
1387 | else{ | 1425 | else{ |
1388 | std::cout << "ERROR: unidentified data type" << std::endl; | 1426 | std::cout << "ERROR: unidentified data type" << std::endl; |
1389 | exit(1); | 1427 | exit(1); |
@@ -1391,14 +1429,36 @@ public: | @@ -1391,14 +1429,36 @@ public: | ||
1391 | } | 1429 | } |
1392 | else if (header.interleave == envi_header::BIP){ | 1430 | else if (header.interleave == envi_header::BIP){ |
1393 | if (header.data_type == envi_header::float32) | 1431 | if (header.data_type == envi_header::float32) |
1394 | - return ((bip<float>*)file)->avg_band(p, mask, PROGRESS); | 1432 | + return ((bip<float>*)file)->mean_spectrum(p, std, mask, PROGRESS); |
1433 | + else if (header.data_type == envi_header::float64) | ||
1434 | + return ((bip<double>*)file)->mean_spectrum(p, std, mask, PROGRESS); | ||
1435 | + else{ | ||
1436 | + std::cout << "ERROR: unidentified data type" << std::endl; | ||
1437 | + exit(1); | ||
1438 | + } | ||
1439 | + } | ||
1440 | + return false; | ||
1441 | + } | ||
1442 | + | ||
1443 | + /// Calculate the mean value for all masked (or valid) pixels in a band and returns the average spectrum | ||
1444 | + | ||
1445 | + /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum | ||
1446 | + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | ||
1447 | + bool median_spectrum(double* m, unsigned char* mask, bool PROGRESS = false){ | ||
1448 | + if (header.interleave == envi_header::BSQ){ | ||
1449 | + if (header.data_type == envi_header::float32) | ||
1450 | + return ((bsq<float>*)file)->median_spectrum(m, mask, PROGRESS); | ||
1395 | else if (header.data_type == envi_header::float64) | 1451 | else if (header.data_type == envi_header::float64) |
1396 | - return ((bip<double>*)file)->avg_band(p, mask, PROGRESS); | 1452 | + return ((bsq<double>*)file)->median_spectrum(m, mask, PROGRESS); |
1397 | else{ | 1453 | else{ |
1398 | std::cout << "ERROR: unidentified data type" << std::endl; | 1454 | std::cout << "ERROR: unidentified data type" << std::endl; |
1399 | exit(1); | 1455 | exit(1); |
1400 | } | 1456 | } |
1401 | } | 1457 | } |
1458 | + else{ | ||
1459 | + std::cout<<"ERROR: median calculation is only supported for BSQ interleave types. Convert to process."<<std::endl; | ||
1460 | + exit(1); | ||
1461 | + } | ||
1402 | return false; | 1462 | return false; |
1403 | } | 1463 | } |
1404 | 1464 | ||
@@ -1407,16 +1467,16 @@ public: | @@ -1407,16 +1467,16 @@ public: | ||
1407 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix | 1467 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1408 | /// @param avg is a pointer to memory of size B that stores the average spectrum | 1468 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1409 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1469 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1410 | - bool co_matrix(double* co, double* avg, unsigned char* mask, bool PROGRESS = false){ | 1470 | + bool co_matrix(double* co, double* avg, unsigned char* mask, bool use_gpu, bool PROGRESS = false){ |
1411 | if (header.interleave == envi_header::BSQ){ | 1471 | if (header.interleave == envi_header::BSQ){ |
1412 | std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl; | 1472 | std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl; |
1413 | exit(1); | 1473 | exit(1); |
1414 | } | 1474 | } |
1415 | else if (header.interleave == envi_header::BIL){ | 1475 | else if (header.interleave == envi_header::BIL){ |
1416 | if (header.data_type == envi_header::float32) | 1476 | if (header.data_type == envi_header::float32) |
1417 | - return ((bil<float>*)file)->co_matrix(co, avg, mask, PROGRESS); | 1477 | + return ((bil<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); |
1418 | else if (header.data_type == envi_header::float64) | 1478 | else if (header.data_type == envi_header::float64) |
1419 | - return ((bil<double>*)file)->co_matrix(co, avg, mask, PROGRESS); | 1479 | + return ((bil<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); |
1420 | else{ | 1480 | else{ |
1421 | std::cout << "ERROR: unidentified data type" << std::endl; | 1481 | std::cout << "ERROR: unidentified data type" << std::endl; |
1422 | exit(1); | 1482 | exit(1); |
@@ -1424,9 +1484,9 @@ public: | @@ -1424,9 +1484,9 @@ public: | ||
1424 | } | 1484 | } |
1425 | else if (header.interleave == envi_header::BIP){ | 1485 | else if (header.interleave == envi_header::BIP){ |
1426 | if (header.data_type == envi_header::float32) | 1486 | if (header.data_type == envi_header::float32) |
1427 | - return ((bip<float>*)file)->co_matrix(co, avg, mask, PROGRESS); | 1487 | + return ((bip<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); |
1428 | else if (header.data_type == envi_header::float64) | 1488 | else if (header.data_type == envi_header::float64) |
1429 | - return ((bip<double>*)file)->co_matrix(co, avg, mask, PROGRESS); | 1489 | + return ((bip<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); |
1430 | else{ | 1490 | else{ |
1431 | std::cout << "ERROR: unidentified data type" << std::endl; | 1491 | std::cout << "ERROR: unidentified data type" << std::endl; |
1432 | exit(1); | 1492 | exit(1); |
@@ -1440,7 +1500,7 @@ public: | @@ -1440,7 +1500,7 @@ public: | ||
1440 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix | 1500 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1441 | /// @param avg is a pointer to memory of size B that stores the average spectrum | 1501 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1442 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1502 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1443 | - bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){ | 1503 | + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool use_gpu = true, bool PROGRESS = false){ |
1444 | if (header.interleave == envi_header::BSQ){ | 1504 | if (header.interleave == envi_header::BSQ){ |
1445 | std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl; | 1505 | std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl; |
1446 | exit(1); | 1506 | exit(1); |
@@ -1454,9 +1514,9 @@ public: | @@ -1454,9 +1514,9 @@ public: | ||
1454 | 1514 | ||
1455 | else if (header.interleave == envi_header::BIP){ | 1515 | else if (header.interleave == envi_header::BIP){ |
1456 | if (header.data_type == envi_header::float32) | 1516 | if (header.data_type == envi_header::float32) |
1457 | - return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); | 1517 | + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS); |
1458 | else if (header.data_type == envi_header::float64) | 1518 | else if (header.data_type == envi_header::float64) |
1459 | - return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); | 1519 | + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS); |
1460 | else{ | 1520 | else{ |
1461 | std::cout << "ERROR: unidentified data type" << std::endl; | 1521 | std::cout << "ERROR: unidentified data type" << std::endl; |
1462 | exit(1); | 1522 | exit(1); |
@@ -1524,6 +1584,41 @@ public: | @@ -1524,6 +1584,41 @@ public: | ||
1524 | return false; | 1584 | return false; |
1525 | } | 1585 | } |
1526 | 1586 | ||
1587 | + void subimages(std::string outfile, size_t nx, size_t ny, unsigned char* mask, bool PROGRESS = false){ | ||
1588 | + | ||
1589 | + size_t nnz = 0; //initialize the number of subimages to zero | ||
1590 | + for(size_t i = 0; i < header.lines * header.samples; i++) //for each pixel in the mask | ||
1591 | + if(mask[i]) nnz++; //if the pixel is valid, add a subimage | ||
1592 | + | ||
1593 | + | ||
1594 | + //save the header for the cropped file | ||
1595 | + stim::envi_header new_header = header; | ||
1596 | + new_header.samples = nx; //calculate the width of the output image (concatenated subimages) | ||
1597 | + new_header.lines = nnz * ny; //calculate the height of the output image (height of subimages) | ||
1598 | + | ||
1599 | + | ||
1600 | + if (header.interleave == envi_header::BSQ){ | ||
1601 | + if (header.data_type == envi_header::float32) | ||
1602 | + ((bsq<float>*)file)->subimages(outfile, nx, ny, mask, PROGRESS); | ||
1603 | + else if (header.data_type == envi_header::float64) | ||
1604 | + ((bsq<double>*)file)->subimages(outfile, nx, ny, mask, PROGRESS); | ||
1605 | + else{ | ||
1606 | + std::cout << "ERROR: unidentified data type" << std::endl; | ||
1607 | + exit(1); | ||
1608 | + } | ||
1609 | + } | ||
1610 | + else if (header.interleave == envi_header::BIL){ | ||
1611 | + std::cout << "ERROR: unidentified data type" << std::endl; | ||
1612 | + exit(1); | ||
1613 | + } | ||
1614 | + else if (header.interleave == envi_header::BIP){ | ||
1615 | + std::cout << "ERROR: unidentified data type" << std::endl; | ||
1616 | + exit(1); | ||
1617 | + } | ||
1618 | + | ||
1619 | + new_header.save(outfile + ".hdr"); //save the header for the output file | ||
1620 | + } | ||
1621 | + | ||
1527 | /// Remove a list of bands from the ENVI file | 1622 | /// Remove a list of bands from the ENVI file |
1528 | 1623 | ||
1529 | /// @param outfile is the file name for the output hyperspectral image (with trimmed bands) | 1624 | /// @param outfile is the file name for the output hyperspectral image (with trimmed bands) |
@@ -1801,6 +1896,44 @@ public: | @@ -1801,6 +1896,44 @@ public: | ||
1801 | } | 1896 | } |
1802 | exit(1); | 1897 | exit(1); |
1803 | } | 1898 | } |
1899 | + | ||
1900 | + | ||
1901 | + | ||
1902 | + | ||
1903 | + void fft(std::string outfile, double band_min, double band_max, size_t samples = 0, void* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int cuda_device = 0){ | ||
1904 | + if(samples == 0) samples = header.bands; | ||
1905 | + double B = (double)header.bands; | ||
1906 | + double delta = header.wavelength[1] - header.wavelength[0]; //calculate spacing in the current domain | ||
1907 | + double span = samples * delta; //calculate the span in the current domain | ||
1908 | + double fft_delta = 1.0 / span; //calculate the span in the FFT domain | ||
1909 | + double fft_max = fft_delta * samples/2; //calculate the maximum range of the FFT | ||
1910 | + | ||
1911 | + if(band_max > fft_max) band_max = fft_max; //the user gave a band outside of the FFT range, reset the band to the maximum available | ||
1912 | + size_t start_i = (size_t)std::ceil(band_min / fft_delta); //calculate the first band to store | ||
1913 | + size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i + 1; //calculate the number of bands to store | ||
1914 | + size_t end_i = start_i + size_i - 1; //last band number | ||
1915 | + | ||
1916 | + envi_header new_header = header; | ||
1917 | + new_header.bands = size_i; | ||
1918 | + new_header.set_wavelengths(start_i * fft_delta, fft_delta); | ||
1919 | + new_header.wavelength_units = "inv_" + header.wavelength_units; | ||
1920 | + new_header.save(outfile + ".hdr"); | ||
1921 | + | ||
1922 | + if (header.interleave == envi_header::BIP){ | ||
1923 | + if (header.data_type == envi_header::float32) | ||
1924 | + ((bip<float>*)file)->fft(outfile, start_i, end_i, samples, (float*)ratio, rx, ry, PROGRESS, cuda_device); | ||
1925 | + else if (header.data_type == envi_header::float64) | ||
1926 | + ((bip<double>*)file)->fft(outfile, start_i, end_i, samples, (double*)ratio, rx, ry, PROGRESS, cuda_device); | ||
1927 | + else{ | ||
1928 | + std::cout << "ERROR: unidentified data type" << std::endl; | ||
1929 | + exit(1); | ||
1930 | + } | ||
1931 | + } | ||
1932 | + else{ | ||
1933 | + std::cout<<"ERROR: only BIP files supported for FFT"<<std::endl; | ||
1934 | + exit(1); | ||
1935 | + } | ||
1936 | + } | ||
1804 | }; //end ENVI | 1937 | }; //end ENVI |
1805 | 1938 | ||
1806 | } //end namespace rts | 1939 | } //end namespace rts |
stim/envi/envi_header.h
@@ -78,6 +78,14 @@ struct envi_header | @@ -78,6 +78,14 @@ struct envi_header | ||
78 | load(name); | 78 | load(name); |
79 | } | 79 | } |
80 | 80 | ||
81 | + //sets the wavelength vector given a starting value and uniform step size | ||
82 | + void set_wavelengths(double start, double step){ | ||
83 | + size_t B = bands; //get the number of bands | ||
84 | + wavelength.resize(B); | ||
85 | + for(size_t b = 0; b < B; b++) | ||
86 | + wavelength[b] = start + b * step; | ||
87 | + } | ||
88 | + | ||
81 | std::string trim(std::string line){ | 89 | std::string trim(std::string line){ |
82 | 90 | ||
83 | if(line.length() == 0) | 91 | if(line.length() == 0) |
@@ -417,8 +425,13 @@ struct envi_header | @@ -417,8 +425,13 @@ struct envi_header | ||
417 | default: | 425 | default: |
418 | return 0; | 426 | return 0; |
419 | } | 427 | } |
428 | + } | ||
420 | 429 | ||
430 | + //return the number of bytes that SHOULD be in the data file | ||
431 | + size_t data_bytes(){ | ||
432 | + return samples * lines * bands * valsize() + header_offset; | ||
421 | } | 433 | } |
434 | + | ||
422 | 435 | ||
423 | /// Convert an interleave type to a string | 436 | /// Convert an interleave type to a string |
424 | static std::string interleave_str(interleaveType t){ | 437 | static std::string interleave_str(interleaveType t){ |
stim/envi/hsi.h
@@ -142,7 +142,7 @@ public: | @@ -142,7 +142,7 @@ public: | ||
142 | void mask_finite(unsigned char* out_mask, unsigned char* mask, bool PROGRESS = false){ | 142 | void mask_finite(unsigned char* out_mask, unsigned char* mask, bool PROGRESS = false){ |
143 | size_t XY = X() * Y(); | 143 | size_t XY = X() * Y(); |
144 | if(mask == NULL) //if no mask is provided | 144 | if(mask == NULL) //if no mask is provided |
145 | - memset(mask, 255, XY * sizeof(unsigned char)); //initialize the mask to 255 | 145 | + memset(out_mask, 255, XY * sizeof(unsigned char)); //initialize the mask to 255 |
146 | else //if a mask is provided | 146 | else //if a mask is provided |
147 | memcpy(out_mask, mask, XY * sizeof(unsigned char)); //initialize the current mask to that one | 147 | memcpy(out_mask, mask, XY * sizeof(unsigned char)); //initialize the current mask to that one |
148 | T* page = (T*)malloc(R[0] * R[1] * sizeof(T)); //allocate space for a page of data | 148 | T* page = (T*)malloc(R[0] * R[1] * sizeof(T)); //allocate space for a page of data |
stim/gl/error.h
stim/gl/gl_spider.h
@@ -479,7 +479,7 @@ class gl_spider // : public virtual gl_texture<T> | @@ -479,7 +479,7 @@ class gl_spider // : public virtual gl_texture<T> | ||
479 | glEndList(); ///finilize the display list. | 479 | glEndList(); ///finilize the display list. |
480 | #ifdef DEBUG | 480 | #ifdef DEBUG |
481 | for(int i = 0; i < numSamplesPos; i++) | 481 | for(int i = 0; i < numSamplesPos; i++) |
482 | - std::cout << pV[i] << std::endl; | 482 | + std::cout << pV[i].str() << std::endl; |
483 | #endif | 483 | #endif |
484 | } | 484 | } |
485 | 485 | ||
@@ -1151,8 +1151,8 @@ class gl_spider // : public virtual gl_texture<T> | @@ -1151,8 +1151,8 @@ class gl_spider // : public virtual gl_texture<T> | ||
1151 | out[3] = temp[2]; | 1151 | out[3] = temp[2]; |
1152 | } | 1152 | } |
1153 | #ifdef DEBUG | 1153 | #ifdef DEBUG |
1154 | -// std::cout << "out is " << out << std::endl; | ||
1155 | -// std::cout << "when rotating from " << from << " to " << dir << std::endl; | 1154 | + std::cout << "out is " << out.str() << std::endl; |
1155 | + std::cout << "when rotating from " << from.str() << " to " << dir.str() << std::endl; | ||
1156 | #endif | 1156 | #endif |
1157 | return out; | 1157 | return out; |
1158 | } | 1158 | } |
@@ -1545,7 +1545,7 @@ class gl_spider // : public virtual gl_texture<T> | @@ -1545,7 +1545,7 @@ class gl_spider // : public virtual gl_texture<T> | ||
1545 | setMagnitude(curSeedMag); | 1545 | setMagnitude(curSeedMag); |
1546 | 1546 | ||
1547 | #ifdef DEBUG | 1547 | #ifdef DEBUG |
1548 | - std::cout << "The new seed " << curSeed << curSeedVec << curSeedMag << std::endl; | 1548 | + std::cout << "The new seed " << curSeed.str() << curSeedVec.str() << curSeedMag << std::endl; |
1549 | #endif | 1549 | #endif |
1550 | 1550 | ||
1551 | // Bind(direction_texID, direction_buffID, numSamples, n_pixels); | 1551 | // Bind(direction_texID, direction_buffID, numSamples, n_pixels); |
stim/grids/image_stack.h
@@ -139,7 +139,7 @@ public: | @@ -139,7 +139,7 @@ public: | ||
139 | /// @param depth, number of pixels in depth. | 139 | /// @param depth, number of pixels in depth. |
140 | void init(int channels, int width, int height, int depth) | 140 | void init(int channels, int width, int height, int depth) |
141 | { | 141 | { |
142 | - R.resize(4); | 142 | + //R.resize(4); |
143 | R[0] = channels; | 143 | R[0] = channels; |
144 | R[1] = width; | 144 | R[1] = width; |
145 | R[2] = height; | 145 | R[2] = height; |
stim/image/image.h
@@ -10,6 +10,7 @@ | @@ -10,6 +10,7 @@ | ||
10 | #include <limits> | 10 | #include <limits> |
11 | #include <typeinfo> | 11 | #include <typeinfo> |
12 | #include <fstream> | 12 | #include <fstream> |
13 | +#include <cstring> | ||
13 | 14 | ||
14 | namespace stim{ | 15 | namespace stim{ |
15 | /// This static class provides the STIM interface for loading, saving, and storing 2D images. | 16 | /// This static class provides the STIM interface for loading, saving, and storing 2D images. |
@@ -74,18 +75,7 @@ class image{ | @@ -74,18 +75,7 @@ class image{ | ||
74 | #endif | 75 | #endif |
75 | /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images) | 76 | /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images) |
76 | T white(){ | 77 | T white(){ |
77 | - | ||
78 | - if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX; | ||
79 | - if(typeid(T) == typeid(unsigned short)) return SHRT_MAX; | ||
80 | - if(typeid(T) == typeid(unsigned)) return UINT_MAX; | ||
81 | - if(typeid(T) == typeid(unsigned long)) return ULONG_MAX; | ||
82 | - if(typeid(T) == typeid(unsigned long long)) return ULLONG_MAX; | ||
83 | - if(typeid(T) == typeid(float)) return 1.0f; | ||
84 | - if(typeid(T) == typeid(double)) return 1.0; | ||
85 | - | ||
86 | - std::cout<<"ERROR in stim::image::white - no white value known for this data type"<<std::endl; | ||
87 | - exit(1); | ||
88 | - | 78 | + return std::numeric_limits<T>::max(); |
89 | } | 79 | } |
90 | 80 | ||
91 | 81 |
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
1 | +#ifndef RTS_BESSEL_H | ||
2 | +#define RTS_BESSEL_H | ||
3 | + | ||
4 | +#define _USE_MATH_DEFINES | ||
5 | +#include <math.h> | ||
6 | +#include "../math/complex.h" | ||
7 | +#define eps 1e-15 | ||
8 | +#define el 0.5772156649015329 | ||
9 | + | ||
10 | + | ||
11 | +namespace stim{ | ||
12 | + | ||
13 | +static complex<double> cii(0.0,1.0); | ||
14 | +static complex<double> cone(1.0,0.0); | ||
15 | +static complex<double> czero(0.0,0.0); | ||
16 | + | ||
17 | +template< typename P > | ||
18 | +P gamma(P x) | ||
19 | +{ | ||
20 | + int i,k,m; | ||
21 | + P ga,gr,r,z; | ||
22 | + | ||
23 | + static P g[] = { | ||
24 | + 1.0, | ||
25 | + 0.5772156649015329, | ||
26 | + -0.6558780715202538, | ||
27 | + -0.420026350340952e-1, | ||
28 | + 0.1665386113822915, | ||
29 | + -0.421977345555443e-1, | ||
30 | + -0.9621971527877e-2, | ||
31 | + 0.7218943246663e-2, | ||
32 | + -0.11651675918591e-2, | ||
33 | + -0.2152416741149e-3, | ||
34 | + 0.1280502823882e-3, | ||
35 | + -0.201348547807e-4, | ||
36 | + -0.12504934821e-5, | ||
37 | + 0.1133027232e-5, | ||
38 | + -0.2056338417e-6, | ||
39 | + 0.6116095e-8, | ||
40 | + 0.50020075e-8, | ||
41 | + -0.11812746e-8, | ||
42 | + 0.1043427e-9, | ||
43 | + 0.77823e-11, | ||
44 | + -0.36968e-11, | ||
45 | + 0.51e-12, | ||
46 | + -0.206e-13, | ||
47 | + -0.54e-14, | ||
48 | + 0.14e-14}; | ||
49 | + | ||
50 | + if (x > 171.0) return 1e308; // This value is an overflow flag. | ||
51 | + if (x == (int)x) { | ||
52 | + if (x > 0.0) { | ||
53 | + ga = 1.0; // use factorial | ||
54 | + for (i=2;i<x;i++) { | ||
55 | + ga *= i; | ||
56 | + } | ||
57 | + } | ||
58 | + else | ||
59 | + ga = 1e308; | ||
60 | + } | ||
61 | + else { | ||
62 | + if (fabs(x) > 1.0) { | ||
63 | + z = fabs(x); | ||
64 | + m = (int)z; | ||
65 | + r = 1.0; | ||
66 | + for (k=1;k<=m;k++) { | ||
67 | + r *= (z-k); | ||
68 | + } | ||
69 | + z -= m; | ||
70 | + } | ||
71 | + else | ||
72 | + z = x; | ||
73 | + gr = g[24]; | ||
74 | + for (k=23;k>=0;k--) { | ||
75 | + gr = gr*z+g[k]; | ||
76 | + } | ||
77 | + ga = 1.0/(gr*z); | ||
78 | + if (fabs(x) > 1.0) { | ||
79 | + ga *= r; | ||
80 | + if (x < 0.0) { | ||
81 | + ga = -M_PI/(x*ga*sin(M_PI*x)); | ||
82 | + } | ||
83 | + } | ||
84 | + } | ||
85 | + return ga; | ||
86 | +} | ||
87 | + | ||
88 | +template<typename P> | ||
89 | +int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, | ||
90 | + P &j0p,P &j1p,P &y0p,P &y1p) | ||
91 | +{ | ||
92 | + P x2,r,ec,w0,w1,r0,r1,cs0,cs1; | ||
93 | + P cu,p0,q0,p1,q1,t1,t2; | ||
94 | + int k,kz; | ||
95 | + static P a[] = { | ||
96 | + -7.03125e-2, | ||
97 | + 0.112152099609375, | ||
98 | + -0.5725014209747314, | ||
99 | + 6.074042001273483, | ||
100 | + -1.100171402692467e2, | ||
101 | + 3.038090510922384e3, | ||
102 | + -1.188384262567832e5, | ||
103 | + 6.252951493434797e6, | ||
104 | + -4.259392165047669e8, | ||
105 | + 3.646840080706556e10, | ||
106 | + -3.833534661393944e12, | ||
107 | + 4.854014686852901e14, | ||
108 | + -7.286857349377656e16, | ||
109 | + 1.279721941975975e19}; | ||
110 | + static P b[] = { | ||
111 | + 7.32421875e-2, | ||
112 | + -0.2271080017089844, | ||
113 | + 1.727727502584457, | ||
114 | + -2.438052969955606e1, | ||
115 | + 5.513358961220206e2, | ||
116 | + -1.825775547429318e4, | ||
117 | + 8.328593040162893e5, | ||
118 | + -5.006958953198893e7, | ||
119 | + 3.836255180230433e9, | ||
120 | + -3.649010818849833e11, | ||
121 | + 4.218971570284096e13, | ||
122 | + -5.827244631566907e15, | ||
123 | + 9.476288099260110e17, | ||
124 | + -1.792162323051699e20}; | ||
125 | + static P a1[] = { | ||
126 | + 0.1171875, | ||
127 | + -0.1441955566406250, | ||
128 | + 0.6765925884246826, | ||
129 | + -6.883914268109947, | ||
130 | + 1.215978918765359e2, | ||
131 | + -3.302272294480852e3, | ||
132 | + 1.276412726461746e5, | ||
133 | + -6.656367718817688e6, | ||
134 | + 4.502786003050393e8, | ||
135 | + -3.833857520742790e10, | ||
136 | + 4.011838599133198e12, | ||
137 | + -5.060568503314727e14, | ||
138 | + 7.572616461117958e16, | ||
139 | + -1.326257285320556e19}; | ||
140 | + static P b1[] = { | ||
141 | + -0.1025390625, | ||
142 | + 0.2775764465332031, | ||
143 | + -1.993531733751297, | ||
144 | + 2.724882731126854e1, | ||
145 | + -6.038440767050702e2, | ||
146 | + 1.971837591223663e4, | ||
147 | + -8.902978767070678e5, | ||
148 | + 5.310411010968522e7, | ||
149 | + -4.043620325107754e9, | ||
150 | + 3.827011346598605e11, | ||
151 | + -4.406481417852278e13, | ||
152 | + 6.065091351222699e15, | ||
153 | + -9.833883876590679e17, | ||
154 | + 1.855045211579828e20}; | ||
155 | + | ||
156 | + if (x < 0.0) return 1; | ||
157 | + if (x == 0.0) { | ||
158 | + j0 = 1.0; | ||
159 | + j1 = 0.0; | ||
160 | + y0 = -1e308; | ||
161 | + y1 = -1e308; | ||
162 | + j0p = 0.0; | ||
163 | + j1p = 0.5; | ||
164 | + y0p = 1e308; | ||
165 | + y1p = 1e308; | ||
166 | + return 0; | ||
167 | + } | ||
168 | + x2 = x*x; | ||
169 | + if (x <= 12.0) { | ||
170 | + j0 = 1.0; | ||
171 | + r = 1.0; | ||
172 | + for (k=1;k<=30;k++) { | ||
173 | + r *= -0.25*x2/(k*k); | ||
174 | + j0 += r; | ||
175 | + if (fabs(r) < fabs(j0)*1e-15) break; | ||
176 | + } | ||
177 | + j1 = 1.0; | ||
178 | + r = 1.0; | ||
179 | + for (k=1;k<=30;k++) { | ||
180 | + r *= -0.25*x2/(k*(k+1)); | ||
181 | + j1 += r; | ||
182 | + if (fabs(r) < fabs(j1)*1e-15) break; | ||
183 | + } | ||
184 | + j1 *= 0.5*x; | ||
185 | + ec = log(0.5*x)+el; | ||
186 | + cs0 = 0.0; | ||
187 | + w0 = 0.0; | ||
188 | + r0 = 1.0; | ||
189 | + for (k=1;k<=30;k++) { | ||
190 | + w0 += 1.0/k; | ||
191 | + r0 *= -0.25*x2/(k*k); | ||
192 | + r = r0 * w0; | ||
193 | + cs0 += r; | ||
194 | + if (fabs(r) < fabs(cs0)*1e-15) break; | ||
195 | + } | ||
196 | + y0 = M_2_PI*(ec*j0-cs0); | ||
197 | + cs1 = 1.0; | ||
198 | + w1 = 0.0; | ||
199 | + r1 = 1.0; | ||
200 | + for (k=1;k<=30;k++) { | ||
201 | + w1 += 1.0/k; | ||
202 | + r1 *= -0.25*x2/(k*(k+1)); | ||
203 | + r = r1*(2.0*w1+1.0/(k+1)); | ||
204 | + cs1 += r; | ||
205 | + if (fabs(r) < fabs(cs1)*1e-15) break; | ||
206 | + } | ||
207 | + y1 = M_2_PI * (ec*j1-1.0/x-0.25*x*cs1); | ||
208 | + } | ||
209 | + else { | ||
210 | + if (x >= 50.0) kz = 8; // Can be changed to 10 | ||
211 | + else if (x >= 35.0) kz = 10; // " " 12 | ||
212 | + else kz = 12; // " " 14 | ||
213 | + t1 = x-M_PI_4; | ||
214 | + p0 = 1.0; | ||
215 | + q0 = -0.125/x; | ||
216 | + for (k=0;k<kz;k++) { | ||
217 | + p0 += a[k]*pow(x,-2*k-2); | ||
218 | + q0 += b[k]*pow(x,-2*k-3); | ||
219 | + } | ||
220 | + cu = sqrt(M_2_PI/x); | ||
221 | + j0 = cu*(p0*cos(t1)-q0*sin(t1)); | ||
222 | + y0 = cu*(p0*sin(t1)+q0*cos(t1)); | ||
223 | + t2 = x-0.75*M_PI; | ||
224 | + p1 = 1.0; | ||
225 | + q1 = 0.375/x; | ||
226 | + for (k=0;k<kz;k++) { | ||
227 | + p1 += a1[k]*pow(x,-2*k-2); | ||
228 | + q1 += b1[k]*pow(x,-2*k-3); | ||
229 | + } | ||
230 | + j1 = cu*(p1*cos(t2)-q1*sin(t2)); | ||
231 | + y1 = cu*(p1*sin(t2)+q1*cos(t2)); | ||
232 | + } | ||
233 | + j0p = -j1; | ||
234 | + j1p = j0-j1/x; | ||
235 | + y0p = -y1; | ||
236 | + y1p = y0-y1/x; | ||
237 | + return 0; | ||
238 | +} | ||
239 | +// | ||
240 | +// INPUT: | ||
241 | +// double x -- argument of Bessel function | ||
242 | +// | ||
243 | +// OUTPUT: | ||
244 | +// double j0 -- Bessel function of 1st kind, 0th order | ||
245 | +// double j1 -- Bessel function of 1st kind, 1st order | ||
246 | +// double y0 -- Bessel function of 2nd kind, 0th order | ||
247 | +// double y1 -- Bessel function of 2nd kind, 1st order | ||
248 | +// double j0p -- derivative of Bessel function of 1st kind, 0th order | ||
249 | +// double j1p -- derivative of Bessel function of 1st kind, 1st order | ||
250 | +// double y0p -- derivative of Bessel function of 2nd kind, 0th order | ||
251 | +// double y1p -- derivative of Bessel function of 2nd kind, 1st order | ||
252 | +// | ||
253 | +// RETURN: | ||
254 | +// int error code: 0 = OK, 1 = error | ||
255 | +// | ||
256 | +// This algorithm computes the functions using polynomial approximations. | ||
257 | +// | ||
258 | +template<typename P> | ||
259 | +int bessjy01b(P x,P &j0,P &j1,P &y0,P &y1, | ||
260 | + P &j0p,P &j1p,P &y0p,P &y1p) | ||
261 | +{ | ||
262 | + P t,t2,dtmp,a0,p0,q0,p1,q1,ta0,ta1; | ||
263 | + if (x < 0.0) return 1; | ||
264 | + if (x == 0.0) { | ||
265 | + j0 = 1.0; | ||
266 | + j1 = 0.0; | ||
267 | + y0 = -1e308; | ||
268 | + y1 = -1e308; | ||
269 | + j0p = 0.0; | ||
270 | + j1p = 0.5; | ||
271 | + y0p = 1e308; | ||
272 | + y1p = 1e308; | ||
273 | + return 0; | ||
274 | + } | ||
275 | + if(x <= 4.0) { | ||
276 | + t = x/4.0; | ||
277 | + t2 = t*t; | ||
278 | + j0 = ((((((-0.5014415e-3*t2+0.76771853e-2)*t2-0.0709253492)*t2+ | ||
279 | + 0.4443584263)*t2-1.7777560599)*t2+3.9999973021)*t2 | ||
280 | + -3.9999998721)*t2+1.0; | ||
281 | + j1 = t*(((((((-0.1289769e-3*t2+0.22069155e-2)*t2-0.0236616773)*t2+ | ||
282 | + 0.1777582922)*t2-0.8888839649)*t2+2.6666660544)*t2- | ||
283 | + 3.999999971)*t2+1.9999999998); | ||
284 | + dtmp = (((((((-0.567433e-4*t2+0.859977e-3)*t2-0.94855882e-2)*t2+ | ||
285 | + 0.0772975809)*t2-0.4261737419)*t2+1.4216421221)*t2- | ||
286 | + 2.3498519931)*t2+1.0766115157)*t2+0.3674669052; | ||
287 | + y0 = M_2_PI*log(0.5*x)*j0+dtmp; | ||
288 | + dtmp = (((((((0.6535773e-3*t2-0.0108175626)*t2+0.107657607)*t2- | ||
289 | + 0.7268945577)*t2+3.1261399273)*t2-7.3980241381)*t2+ | ||
290 | + 6.8529236342)*t2+0.3932562018)*t2-0.6366197726; | ||
291 | + y1 = M_2_PI*log(0.5*x)*j1+dtmp/x; | ||
292 | + } | ||
293 | + else { | ||
294 | + t = 4.0/x; | ||
295 | + t2 = t*t; | ||
296 | + a0 = sqrt(M_2_PI/x); | ||
297 | + p0 = ((((-0.9285e-5*t2+0.43506e-4)*t2-0.122226e-3)*t2+ | ||
298 | + 0.434725e-3)*t2-0.4394275e-2)*t2+0.999999997; | ||
299 | + q0 = t*(((((0.8099e-5*t2-0.35614e-4)*t2+0.85844e-4)*t2- | ||
300 | + 0.218024e-3)*t2+0.1144106e-2)*t2-0.031249995); | ||
301 | + ta0 = x-M_PI_4; | ||
302 | + j0 = a0*(p0*cos(ta0)-q0*sin(ta0)); | ||
303 | + y0 = a0*(p0*sin(ta0)+q0*cos(ta0)); | ||
304 | + p1 = ((((0.10632e-4*t2-0.50363e-4)*t2+0.145575e-3)*t2 | ||
305 | + -0.559487e-3)*t2+0.7323931e-2)*t2+1.000000004; | ||
306 | + q1 = t*(((((-0.9173e-5*t2+0.40658e-4)*t2-0.99941e-4)*t2 | ||
307 | + +0.266891e-3)*t2-0.1601836e-2)*t2+0.093749994); | ||
308 | + ta1 = x-0.75*M_PI; | ||
309 | + j1 = a0*(p1*cos(ta1)-q1*sin(ta1)); | ||
310 | + y1 = a0*(p1*sin(ta1)+q1*cos(ta1)); | ||
311 | + } | ||
312 | + j0p = -j1; | ||
313 | + j1p = j0-j1/x; | ||
314 | + y0p = -y1; | ||
315 | + y1p = y0-y1/x; | ||
316 | + return 0; | ||
317 | +} | ||
318 | +template<typename P> | ||
319 | +int msta1(P x,int mp) | ||
320 | +{ | ||
321 | + P a0,f0,f1,f; | ||
322 | + int i,n0,n1,nn; | ||
323 | + | ||
324 | + a0 = fabs(x); | ||
325 | + n0 = (int)(1.1*a0)+1; | ||
326 | + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-mp; | ||
327 | + n1 = n0+5; | ||
328 | + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-mp; | ||
329 | + for (i=0;i<20;i++) { | ||
330 | + nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); | ||
331 | + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp; | ||
332 | + if (std::abs(nn-n1) < 1) break; | ||
333 | + n0 = n1; | ||
334 | + f0 = f1; | ||
335 | + n1 = nn; | ||
336 | + f1 = f; | ||
337 | + } | ||
338 | + return nn; | ||
339 | +} | ||
340 | +template<typename P> | ||
341 | +int msta2(P x,int n,int mp) | ||
342 | +{ | ||
343 | + P a0,ejn,hmp,f0,f1,f,obj; | ||
344 | + int i,n0,n1,nn; | ||
345 | + | ||
346 | + a0 = fabs(x); | ||
347 | + hmp = 0.5*mp; | ||
348 | + ejn = 0.5*log10(6.28*n)-n*log10(1.36*a0/n); | ||
349 | + if (ejn <= hmp) { | ||
350 | + obj = mp; | ||
351 | + n0 = (int)(1.1*a0); | ||
352 | + if (n0 < 1) n0 = 1; | ||
353 | + } | ||
354 | + else { | ||
355 | + obj = hmp+ejn; | ||
356 | + n0 = n; | ||
357 | + } | ||
358 | + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-obj; | ||
359 | + n1 = n0+5; | ||
360 | + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-obj; | ||
361 | + for (i=0;i<20;i++) { | ||
362 | + nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); | ||
363 | + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj; | ||
364 | + if (std::abs(nn-n1) < 1) break; | ||
365 | + n0 = n1; | ||
366 | + f0 = f1; | ||
367 | + n1 = nn; | ||
368 | + f1 = f; | ||
369 | + } | ||
370 | + return nn+10; | ||
371 | +} | ||
372 | +// | ||
373 | +// INPUT: | ||
374 | +// double x -- argument of Bessel function of 1st and 2nd kind. | ||
375 | +// int n -- order | ||
376 | +// | ||
377 | +// OUPUT: | ||
378 | +// | ||
379 | +// int nm -- highest order actually computed (nm <= n) | ||
380 | +// double jn[] -- Bessel function of 1st kind, orders from 0 to nm | ||
381 | +// double yn[] -- Bessel function of 2nd kind, orders from 0 to nm | ||
382 | +// double j'n[]-- derivative of Bessel function of 1st kind, | ||
383 | +// orders from 0 to nm | ||
384 | +// double y'n[]-- derivative of Bessel function of 2nd kind, | ||
385 | +// orders from 0 to nm | ||
386 | +// | ||
387 | +// Computes Bessel functions of all order up to 'n' using recurrence | ||
388 | +// relations. If 'nm' < 'n' only 'nm' orders are returned. | ||
389 | +// | ||
390 | +template<typename P> | ||
391 | +int bessjyna(int n,P x,int &nm,P *jn,P *yn, | ||
392 | + P *jnp,P *ynp) | ||
393 | +{ | ||
394 | + P bj0,bj1,f,f0,f1,f2,cs; | ||
395 | + int i,k,m,ecode; | ||
396 | + | ||
397 | + nm = n; | ||
398 | + if ((x < 0.0) || (n < 0)) return 1; | ||
399 | + if (x < 1e-15) { | ||
400 | + for (i=0;i<=n;i++) { | ||
401 | + jn[i] = 0.0; | ||
402 | + yn[i] = -1e308; | ||
403 | + jnp[i] = 0.0; | ||
404 | + ynp[i] = 1e308; | ||
405 | + } | ||
406 | + jn[0] = 1.0; | ||
407 | + jnp[1] = 0.5; | ||
408 | + return 0; | ||
409 | + } | ||
410 | + ecode = bessjy01a(x,jn[0],jn[1],yn[0],yn[1],jnp[0],jnp[1],ynp[0],ynp[1]); | ||
411 | + if (n < 2) return 0; | ||
412 | + bj0 = jn[0]; | ||
413 | + bj1 = jn[1]; | ||
414 | + if (n < (int)0.9*x) { | ||
415 | + for (k=2;k<=n;k++) { | ||
416 | + jn[k] = 2.0*(k-1.0)*bj1/x-bj0; | ||
417 | + bj0 = bj1; | ||
418 | + bj1 = jn[k]; | ||
419 | + } | ||
420 | + } | ||
421 | + else { | ||
422 | + m = msta1(x,200); | ||
423 | + if (m < n) nm = m; | ||
424 | + else m = msta2(x,n,15); | ||
425 | + f2 = 0.0; | ||
426 | + f1 = 1.0e-100; | ||
427 | + for (k=m;k>=0;k--) { | ||
428 | + f = 2.0*(k+1.0)/x*f1-f2; | ||
429 | + if (k <= nm) jn[k] = f; | ||
430 | + f2 = f1; | ||
431 | + f1 = f; | ||
432 | + } | ||
433 | + if (fabs(bj0) > fabs(bj1)) cs = bj0/f; | ||
434 | + else cs = bj1/f2; | ||
435 | + for (k=0;k<=nm;k++) { | ||
436 | + jn[k] *= cs; | ||
437 | + } | ||
438 | + } | ||
439 | + for (k=2;k<=nm;k++) { | ||
440 | + jnp[k] = jn[k-1]-k*jn[k]/x; | ||
441 | + } | ||
442 | + f0 = yn[0]; | ||
443 | + f1 = yn[1]; | ||
444 | + for (k=2;k<=nm;k++) { | ||
445 | + f = 2.0*(k-1.0)*f1/x-f0; | ||
446 | + yn[k] = f; | ||
447 | + f0 = f1; | ||
448 | + f1 = f; | ||
449 | + } | ||
450 | + for (k=2;k<=nm;k++) { | ||
451 | + ynp[k] = yn[k-1]-k*yn[k]/x; | ||
452 | + } | ||
453 | + return 0; | ||
454 | +} | ||
455 | +// | ||
456 | +// Same input and output conventions as above. Different recurrence | ||
457 | +// relations used for 'x' < 300. | ||
458 | +// | ||
459 | +template<typename P> | ||
460 | +int bessjynb(int n,P x,int &nm,P *jn,P *yn, | ||
461 | + P *jnp,P *ynp) | ||
462 | +{ | ||
463 | + P t1,t2,f,f1,f2,bj0,bj1,bjk,by0,by1,cu,s0,su,sv; | ||
464 | + P ec,bs,byk,p0,p1,q0,q1; | ||
465 | + static P a[] = { | ||
466 | + -0.7031250000000000e-1, | ||
467 | + 0.1121520996093750, | ||
468 | + -0.5725014209747314, | ||
469 | + 6.074042001273483}; | ||
470 | + static P b[] = { | ||
471 | + 0.7324218750000000e-1, | ||
472 | + -0.2271080017089844, | ||
473 | + 1.727727502584457, | ||
474 | + -2.438052969955606e1}; | ||
475 | + static P a1[] = { | ||
476 | + 0.1171875, | ||
477 | + -0.1441955566406250, | ||
478 | + 0.6765925884246826, | ||
479 | + -6.883914268109947}; | ||
480 | + static P b1[] = { | ||
481 | + -0.1025390625, | ||
482 | + 0.2775764465332031, | ||
483 | + -1.993531733751297, | ||
484 | + 2.724882731126854e1}; | ||
485 | + | ||
486 | + int i,k,m; | ||
487 | + nm = n; | ||
488 | + if ((x < 0.0) || (n < 0)) return 1; | ||
489 | + if (x < 1e-15) { | ||
490 | + for (i=0;i<=n;i++) { | ||
491 | + jn[i] = 0.0; | ||
492 | + yn[i] = -1e308; | ||
493 | + jnp[i] = 0.0; | ||
494 | + ynp[i] = 1e308; | ||
495 | + } | ||
496 | + jn[0] = 1.0; | ||
497 | + jnp[1] = 0.5; | ||
498 | + return 0; | ||
499 | + } | ||
500 | + if (x <= 300.0 || n > (int)(0.9*x)) { | ||
501 | + if (n == 0) nm = 1; | ||
502 | + m = msta1(x,200); | ||
503 | + if (m < nm) nm = m; | ||
504 | + else m = msta2(x,nm,15); | ||
505 | + bs = 0.0; | ||
506 | + su = 0.0; | ||
507 | + sv = 0.0; | ||
508 | + f2 = 0.0; | ||
509 | + f1 = 1.0e-100; | ||
510 | + for (k = m;k>=0;k--) { | ||
511 | + f = 2.0*(k+1.0)/x*f1 - f2; | ||
512 | + if (k <= nm) jn[k] = f; | ||
513 | + if ((k == 2*(int)(k/2)) && (k != 0)) { | ||
514 | + bs += 2.0*f; | ||
515 | +// su += pow(-1,k>>1)*f/(double)k; | ||
516 | + su += (-1)*((k & 2)-1)*f/(P)k; | ||
517 | + } | ||
518 | + else if (k > 1) { | ||
519 | +// sv += pow(-1,k>>1)*k*f/(k*k-1.0); | ||
520 | + sv += (-1)*((k & 2)-1)*(P)k*f/(k*k-1.0); | ||
521 | + } | ||
522 | + f2 = f1; | ||
523 | + f1 = f; | ||
524 | + } | ||
525 | + s0 = bs+f; | ||
526 | + for (k=0;k<=nm;k++) { | ||
527 | + jn[k] /= s0; | ||
528 | + } | ||
529 | + ec = log(0.5*x) +0.5772156649015329; | ||
530 | + by0 = M_2_PI*(ec*jn[0]-4.0*su/s0); | ||
531 | + yn[0] = by0; | ||
532 | + by1 = M_2_PI*((ec-1.0)*jn[1]-jn[0]/x-4.0*sv/s0); | ||
533 | + yn[1] = by1; | ||
534 | + } | ||
535 | + else { | ||
536 | + t1 = x-M_PI_4; | ||
537 | + p0 = 1.0; | ||
538 | + q0 = -0.125/x; | ||
539 | + for (k=0;k<4;k++) { | ||
540 | + p0 += a[k]*pow(x,-2*k-2); | ||
541 | + q0 += b[k]*pow(x,-2*k-3); | ||
542 | + } | ||
543 | + cu = sqrt(M_2_PI/x); | ||
544 | + bj0 = cu*(p0*cos(t1)-q0*sin(t1)); | ||
545 | + by0 = cu*(p0*sin(t1)+q0*cos(t1)); | ||
546 | + jn[0] = bj0; | ||
547 | + yn[0] = by0; | ||
548 | + t2 = x-0.75*M_PI; | ||
549 | + p1 = 1.0; | ||
550 | + q1 = 0.375/x; | ||
551 | + for (k=0;k<4;k++) { | ||
552 | + p1 += a1[k]*pow(x,-2*k-2); | ||
553 | + q1 += b1[k]*pow(x,-2*k-3); | ||
554 | + } | ||
555 | + bj1 = cu*(p1*cos(t2)-q1*sin(t2)); | ||
556 | + by1 = cu*(p1*sin(t2)+q1*cos(t2)); | ||
557 | + jn[1] = bj1; | ||
558 | + yn[1] = by1; | ||
559 | + for (k=2;k<=nm;k++) { | ||
560 | + bjk = 2.0*(k-1.0)*bj1/x-bj0; | ||
561 | + jn[k] = bjk; | ||
562 | + bj0 = bj1; | ||
563 | + bj1 = bjk; | ||
564 | + } | ||
565 | + } | ||
566 | + jnp[0] = -jn[1]; | ||
567 | + for (k=1;k<=nm;k++) { | ||
568 | + jnp[k] = jn[k-1]-k*jn[k]/x; | ||
569 | + } | ||
570 | + for (k=2;k<=nm;k++) { | ||
571 | + byk = 2.0*(k-1.0)*by1/x-by0; | ||
572 | + yn[k] = byk; | ||
573 | + by0 = by1; | ||
574 | + by1 = byk; | ||
575 | + } | ||
576 | + ynp[0] = -yn[1]; | ||
577 | + for (k=1;k<=nm;k++) { | ||
578 | + ynp[k] = yn[k-1]-k*yn[k]/x; | ||
579 | + } | ||
580 | + return 0; | ||
581 | + | ||
582 | +} | ||
583 | + | ||
584 | +// The following routine computes Bessel Jv(x) and Yv(x) for | ||
585 | +// arbitrary positive order (v). For negative order, use: | ||
586 | +// | ||
587 | +// J-v(x) = Jv(x)cos(v pi) - Yv(x)sin(v pi) | ||
588 | +// Y-v(x) = Jv(x)sin(v pi) + Yv(x)cos(v pi) | ||
589 | +// | ||
590 | +template<typename P> | ||
591 | +int bessjyv(P v,P x,P &vm,P *jv,P *yv, | ||
592 | + P *djv,P *dyv) | ||
593 | +{ | ||
594 | + P v0,vl,vg,vv,a,a0,r,x2,bjv0,bjv1,bjvl,f,f0,f1,f2; | ||
595 | + P r0,r1,ck,cs,cs0,cs1,sk,qx,px,byv0,byv1,rp,xk,rq; | ||
596 | + P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; | ||
597 | + int j,k,l,m,n,kz; | ||
598 | + | ||
599 | + x2 = x*x; | ||
600 | + n = (int)v; | ||
601 | + v0 = v-n; | ||
602 | + if ((x < 0.0) || (v < 0.0)) return 1; | ||
603 | + if (x < 1e-15) { | ||
604 | + for (k=0;k<=n;k++) { | ||
605 | + jv[k] = 0.0; | ||
606 | + yv[k] = -1e308; | ||
607 | + djv[k] = 0.0; | ||
608 | + dyv[k] = 1e308; | ||
609 | + if (v0 == 0.0) { | ||
610 | + jv[0] = 1.0; | ||
611 | + djv[1] = 0.5; | ||
612 | + } | ||
613 | + else djv[0] = 1e308; | ||
614 | + } | ||
615 | + vm = v; | ||
616 | + return 0; | ||
617 | + } | ||
618 | + if (x <= 12.0) { | ||
619 | + for (l=0;l<2;l++) { | ||
620 | + vl = v0 + l; | ||
621 | + bjvl = 1.0; | ||
622 | + r = 1.0; | ||
623 | + for (k=1;k<=40;k++) { | ||
624 | + r *= -0.25*x2/(k*(k+vl)); | ||
625 | + bjvl += r; | ||
626 | + if (fabs(r) < fabs(bjvl)*1e-15) break; | ||
627 | + } | ||
628 | + vg = 1.0 + vl; | ||
629 | + a = pow(0.5*x,vl)/gamma(vg); | ||
630 | + if (l == 0) bjv0 = bjvl*a; | ||
631 | + else bjv1 = bjvl*a; | ||
632 | + } | ||
633 | + } | ||
634 | + else { | ||
635 | + if (x >= 50.0) kz = 8; | ||
636 | + else if (x >= 35.0) kz = 10; | ||
637 | + else kz = 11; | ||
638 | + for (j=0;j<2;j++) { | ||
639 | + vv = 4.0*(j+v0)*(j+v0); | ||
640 | + px = 1.0; | ||
641 | + rp = 1.0; | ||
642 | + for (k=1;k<=kz;k++) { | ||
643 | + rp *= (-0.78125e-2)*(vv-pow(4.0*k-3.0,2.0))* | ||
644 | + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*x2); | ||
645 | + px += rp; | ||
646 | + } | ||
647 | + qx = 1.0; | ||
648 | + rq = 1.0; | ||
649 | + for (k=1;k<=kz;k++) { | ||
650 | + rq *= (-0.78125e-2)*(vv-pow(4.0*k-1.0,2.0))* | ||
651 | + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*x2); | ||
652 | + qx += rq; | ||
653 | + } | ||
654 | + qx *= 0.125*(vv-1.0)/x; | ||
655 | + xk = x-(0.5*(j+v0)+0.25)*M_PI; | ||
656 | + a0 = sqrt(M_2_PI/x); | ||
657 | + ck = cos(xk); | ||
658 | + sk = sin(xk); | ||
659 | + | ||
660 | + if (j == 0) { | ||
661 | + bjv0 = a0*(px*ck-qx*sk); | ||
662 | + byv0 = a0*(px*sk+qx*ck); | ||
663 | + } | ||
664 | + else if (j == 1) { | ||
665 | + bjv1 = a0*(px*ck-qx*sk); | ||
666 | + byv1 = a0*(px*sk+qx*ck); | ||
667 | + } | ||
668 | + } | ||
669 | + } | ||
670 | + jv[0] = bjv0; | ||
671 | + jv[1] = bjv1; | ||
672 | + djv[0] = v0*jv[0]/x-jv[1]; | ||
673 | + djv[1] = -(1.0+v0)*jv[1]/x+jv[0]; | ||
674 | + if ((n >= 2) && (n <= (int)(0.9*x))) { | ||
675 | + f0 = bjv0; | ||
676 | + f1 = bjv1; | ||
677 | + for (k=2;k<=n;k++) { | ||
678 | + f = 2.0*(k+v0-1.0)*f1/x-f0; | ||
679 | + jv[k] = f; | ||
680 | + f0 = f1; | ||
681 | + f1 = f; | ||
682 | + } | ||
683 | + } | ||
684 | + else if (n >= 2) { | ||
685 | + m = msta1(x,200); | ||
686 | + if (m < n) n = m; | ||
687 | + else m = msta2(x,n,15); | ||
688 | + f2 = 0.0; | ||
689 | + f1 = 1.0e-100; | ||
690 | + for (k=m;k>=0;k--) { | ||
691 | + f = 2.0*(v0+k+1.0)*f1/x-f2; | ||
692 | + if (k <= n) jv[k] = f; | ||
693 | + f2 = f1; | ||
694 | + f1 = f; | ||
695 | + } | ||
696 | + if (fabs(bjv0) > fabs(bjv1)) cs = bjv0/f; | ||
697 | + else cs = bjv1/f2; | ||
698 | + for (k=0;k<=n;k++) { | ||
699 | + jv[k] *= cs; | ||
700 | + } | ||
701 | + } | ||
702 | + for (k=2;k<=n;k++) { | ||
703 | + djv[k] = -(k+v0)*jv[k]/x+jv[k-1]; | ||
704 | + } | ||
705 | + if (x <= 12.0) { | ||
706 | + if (v0 != 0.0) { | ||
707 | + for (l=0;l<2;l++) { | ||
708 | + vl = v0 +l; | ||
709 | + bjvl = 1.0; | ||
710 | + r = 1.0; | ||
711 | + for (k=1;k<=40;k++) { | ||
712 | + r *= -0.25*x2/(k*(k-vl)); | ||
713 | + bjvl += r; | ||
714 | + if (fabs(r) < fabs(bjvl)*1e-15) break; | ||
715 | + } | ||
716 | + vg = 1.0-vl; | ||
717 | + b = pow(2.0/x,vl)/gamma(vg); | ||
718 | + if (l == 0) bju0 = bjvl*b; | ||
719 | + else bju1 = bjvl*b; | ||
720 | + } | ||
721 | + pv0 = M_PI*v0; | ||
722 | + pv1 = M_PI*(1.0+v0); | ||
723 | + byv0 = (bjv0*cos(pv0)-bju0)/sin(pv0); | ||
724 | + byv1 = (bjv1*cos(pv1)-bju1)/sin(pv1); | ||
725 | + } | ||
726 | + else { | ||
727 | + ec = log(0.5*x)+el; | ||
728 | + cs0 = 0.0; | ||
729 | + w0 = 0.0; | ||
730 | + r0 = 1.0; | ||
731 | + for (k=1;k<=30;k++) { | ||
732 | + w0 += 1.0/k; | ||
733 | + r0 *= -0.25*x2/(k*k); | ||
734 | + cs0 += r0*w0; | ||
735 | + } | ||
736 | + byv0 = M_2_PI*(ec*bjv0-cs0); | ||
737 | + cs1 = 1.0; | ||
738 | + w1 = 0.0; | ||
739 | + r1 = 1.0; | ||
740 | + for (k=1;k<=30;k++) { | ||
741 | + w1 += 1.0/k; | ||
742 | + r1 *= -0.25*x2/(k*(k+1)); | ||
743 | + cs1 += r1*(2.0*w1+1.0/(k+1.0)); | ||
744 | + } | ||
745 | + byv1 = M_2_PI*(ec*bjv1-1.0/x-0.25*x*cs1); | ||
746 | + } | ||
747 | + } | ||
748 | + yv[0] = byv0; | ||
749 | + yv[1] = byv1; | ||
750 | + for (k=2;k<=n;k++) { | ||
751 | + byvk = 2.0*(v0+k-1.0)*byv1/x-byv0; | ||
752 | + yv[k] = byvk; | ||
753 | + byv0 = byv1; | ||
754 | + byv1 = byvk; | ||
755 | + } | ||
756 | + dyv[0] = v0*yv[0]/x-yv[1]; | ||
757 | + for (k=1;k<=n;k++) { | ||
758 | + dyv[k] = -(k+v0)*yv[k]/x+yv[k-1]; | ||
759 | + } | ||
760 | + vm = n + v0; | ||
761 | + return 0; | ||
762 | +} | ||
763 | + | ||
764 | +template<typename P> | ||
765 | +int bessjyv_sph(int v, P z, P &vm, P* cjv, | ||
766 | + P* cyv, P* cjvp, P* cyvp) | ||
767 | +{ | ||
768 | + //first, compute the bessel functions of fractional order | ||
769 | + bessjyv<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | ||
770 | + | ||
771 | + //iterate through each and scale | ||
772 | + for(int n = 0; n<=v; n++) | ||
773 | + { | ||
774 | + | ||
775 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | ||
776 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | ||
777 | + | ||
778 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | ||
779 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | ||
780 | + } | ||
781 | + | ||
782 | + return 0; | ||
783 | + | ||
784 | +} | ||
785 | + | ||
786 | +template<typename P> | ||
787 | +int cbessjy01(complex<P> z,complex<P> &cj0,complex<P> &cj1, | ||
788 | + complex<P> &cy0,complex<P> &cy1,complex<P> &cj0p, | ||
789 | + complex<P> &cj1p,complex<P> &cy0p,complex<P> &cy1p) | ||
790 | +{ | ||
791 | + complex<P> z1,z2,cr,cp,cs,cp0,cq0,cp1,cq1,ct1,ct2,cu; | ||
792 | + P a0,w0,w1; | ||
793 | + int k,kz; | ||
794 | + | ||
795 | + static P a[] = { | ||
796 | + -7.03125e-2, | ||
797 | + 0.112152099609375, | ||
798 | + -0.5725014209747314, | ||
799 | + 6.074042001273483, | ||
800 | + -1.100171402692467e2, | ||
801 | + 3.038090510922384e3, | ||
802 | + -1.188384262567832e5, | ||
803 | + 6.252951493434797e6, | ||
804 | + -4.259392165047669e8, | ||
805 | + 3.646840080706556e10, | ||
806 | + -3.833534661393944e12, | ||
807 | + 4.854014686852901e14, | ||
808 | + -7.286857349377656e16, | ||
809 | + 1.279721941975975e19}; | ||
810 | + static P b[] = { | ||
811 | + 7.32421875e-2, | ||
812 | + -0.2271080017089844, | ||
813 | + 1.727727502584457, | ||
814 | + -2.438052969955606e1, | ||
815 | + 5.513358961220206e2, | ||
816 | + -1.825775547429318e4, | ||
817 | + 8.328593040162893e5, | ||
818 | + -5.006958953198893e7, | ||
819 | + 3.836255180230433e9, | ||
820 | + -3.649010818849833e11, | ||
821 | + 4.218971570284096e13, | ||
822 | + -5.827244631566907e15, | ||
823 | + 9.476288099260110e17, | ||
824 | + -1.792162323051699e20}; | ||
825 | + static P a1[] = { | ||
826 | + 0.1171875, | ||
827 | + -0.1441955566406250, | ||
828 | + 0.6765925884246826, | ||
829 | + -6.883914268109947, | ||
830 | + 1.215978918765359e2, | ||
831 | + -3.302272294480852e3, | ||
832 | + 1.276412726461746e5, | ||
833 | + -6.656367718817688e6, | ||
834 | + 4.502786003050393e8, | ||
835 | + -3.833857520742790e10, | ||
836 | + 4.011838599133198e12, | ||
837 | + -5.060568503314727e14, | ||
838 | + 7.572616461117958e16, | ||
839 | + -1.326257285320556e19}; | ||
840 | + static P b1[] = { | ||
841 | + -0.1025390625, | ||
842 | + 0.2775764465332031, | ||
843 | + -1.993531733751297, | ||
844 | + 2.724882731126854e1, | ||
845 | + -6.038440767050702e2, | ||
846 | + 1.971837591223663e4, | ||
847 | + -8.902978767070678e5, | ||
848 | + 5.310411010968522e7, | ||
849 | + -4.043620325107754e9, | ||
850 | + 3.827011346598605e11, | ||
851 | + -4.406481417852278e13, | ||
852 | + 6.065091351222699e15, | ||
853 | + -9.833883876590679e17, | ||
854 | + 1.855045211579828e20}; | ||
855 | + | ||
856 | + a0 = abs(z); | ||
857 | + z2 = z*z; | ||
858 | + z1 = z; | ||
859 | + if (a0 == 0.0) { | ||
860 | + cj0 = cone; | ||
861 | + cj1 = czero; | ||
862 | + cy0 = complex<P>(-1e308,0); | ||
863 | + cy1 = complex<P>(-1e308,0); | ||
864 | + cj0p = czero; | ||
865 | + cj1p = complex<P>(0.5,0.0); | ||
866 | + cy0p = complex<P>(1e308,0); | ||
867 | + cy1p = complex<P>(1e308,0); | ||
868 | + return 0; | ||
869 | + } | ||
870 | + if (real(z) < 0.0) z1 = -z; | ||
871 | + if (a0 <= 12.0) { | ||
872 | + cj0 = cone; | ||
873 | + cr = cone; | ||
874 | + for (k=1;k<=40;k++) { | ||
875 | + cr *= -0.25*z2/(P)(k*k); | ||
876 | + cj0 += cr; | ||
877 | + if (abs(cr) < abs(cj0)*eps) break; | ||
878 | + } | ||
879 | + cj1 = cone; | ||
880 | + cr = cone; | ||
881 | + for (k=1;k<=40;k++) { | ||
882 | + cr *= -0.25*z2/(k*(k+1.0)); | ||
883 | + cj1 += cr; | ||
884 | + if (abs(cr) < abs(cj1)*eps) break; | ||
885 | + } | ||
886 | + cj1 *= 0.5*z1; | ||
887 | + w0 = 0.0; | ||
888 | + cr = cone; | ||
889 | + cs = czero; | ||
890 | + for (k=1;k<=40;k++) { | ||
891 | + w0 += 1.0/k; | ||
892 | + cr *= -0.25*z2/(P)(k*k); | ||
893 | + cp = cr*w0; | ||
894 | + cs += cp; | ||
895 | + if (abs(cp) < abs(cs)*eps) break; | ||
896 | + } | ||
897 | + cy0 = M_2_PI*((log(0.5*z1)+el)*cj0-cs); | ||
898 | + w1 = 0.0; | ||
899 | + cr = cone; | ||
900 | + cs = cone; | ||
901 | + for (k=1;k<=40;k++) { | ||
902 | + w1 += 1.0/k; | ||
903 | + cr *= -0.25*z2/(k*(k+1.0)); | ||
904 | + cp = cr*(2.0*w1+1.0/(k+1.0)); | ||
905 | + cs += cp; | ||
906 | + if (abs(cp) < abs(cs)*eps) break; | ||
907 | + } | ||
908 | + cy1 = M_2_PI*((log(0.5*z1)+el)*cj1-1.0/z1-0.25*z1*cs); | ||
909 | + } | ||
910 | + else { | ||
911 | + if (a0 >= 50.0) kz = 8; // can be changed to 10 | ||
912 | + else if (a0 >= 35.0) kz = 10; // " " " 12 | ||
913 | + else kz = 12; // " " " 14 | ||
914 | + ct1 = z1 - M_PI_4; | ||
915 | + cp0 = cone; | ||
916 | + for (k=0;k<kz;k++) { | ||
917 | + cp0 += a[k]*pow(z1,-2.0*k-2.0); | ||
918 | + } | ||
919 | + cq0 = -0.125/z1; | ||
920 | + for (k=0;k<kz;k++) { | ||
921 | + cq0 += b[k]*pow(z1,-2.0*k-3.0); | ||
922 | + } | ||
923 | + cu = sqrt(M_2_PI/z1); | ||
924 | + cj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1)); | ||
925 | + cy0 = cu*(cp0*sin(ct1)+cq0*cos(ct1)); | ||
926 | + ct2 = z1 - 0.75*M_PI; | ||
927 | + cp1 = cone; | ||
928 | + for (k=0;k<kz;k++) { | ||
929 | + cp1 += a1[k]*pow(z1,-2.0*k-2.0); | ||
930 | + } | ||
931 | + cq1 = 0.375/z1; | ||
932 | + for (k=0;k<kz;k++) { | ||
933 | + cq1 += b1[k]*pow(z1,-2.0*k-3.0); | ||
934 | + } | ||
935 | + cj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2)); | ||
936 | + cy1 = cu*(cp1*sin(ct2)+cq1*cos(ct2)); | ||
937 | + } | ||
938 | + if (real(z) < 0.0) { | ||
939 | + if (imag(z) < 0.0) { | ||
940 | + cy0 -= 2.0*cii*cj0; | ||
941 | + cy1 = -(cy1-2.0*cii*cj1); | ||
942 | + } | ||
943 | + else if (imag(z) > 0.0) { | ||
944 | + cy0 += 2.0*cii*cj0; | ||
945 | + cy1 = -(cy1+2.0*cii*cj1); | ||
946 | + } | ||
947 | + cj1 = -cj1; | ||
948 | + } | ||
949 | + cj0p = -cj1; | ||
950 | + cj1p = cj0-cj1/z; | ||
951 | + cy0p = -cy1; | ||
952 | + cy1p = cy0-cy1/z; | ||
953 | + return 0; | ||
954 | +} | ||
955 | + | ||
956 | +template<typename P> | ||
957 | +int cbessjyna(int n,complex<P> z,int &nm,complex<P> *cj, | ||
958 | + complex<P> *cy,complex<P> *cjp,complex<P> *cyp) | ||
959 | +{ | ||
960 | + complex<P> cbj0,cbj1,cby0,cby1,cj0,cjk,cj1,cf,cf1,cf2; | ||
961 | + complex<P> cs,cg0,cg1,cyk,cyl1,cyl2,cylk,cp11,cp12,cp21,cp22; | ||
962 | + complex<P> ch0,ch1,ch2; | ||
963 | + P a0,yak,ya1,ya0,wa; | ||
964 | + int m,k,lb,lb0; | ||
965 | + | ||
966 | + if (n < 0) return 1; | ||
967 | + a0 = abs(z); | ||
968 | + nm = n; | ||
969 | + if (a0 < 1.0e-100) { | ||
970 | + for (k=0;k<=n;k++) { | ||
971 | + cj[k] = czero; | ||
972 | + cy[k] = complex<P> (-1e308,0); | ||
973 | + cjp[k] = czero; | ||
974 | + cyp[k] = complex<P>(1e308,0); | ||
975 | + } | ||
976 | + cj[0] = cone; | ||
977 | + cjp[1] = complex<P>(0.5,0.0); | ||
978 | + return 0; | ||
979 | + } | ||
980 | + cbessjy01(z,cj[0],cj[1],cy[0],cy[1],cjp[0],cjp[1],cyp[0],cyp[1]); | ||
981 | + cbj0 = cj[0]; | ||
982 | + cbj1 = cj[1]; | ||
983 | + cby0 = cy[0]; | ||
984 | + cby1 = cy[1]; | ||
985 | + if (n <= 1) return 0; | ||
986 | + if (n < (int)0.25*a0) { | ||
987 | + cj0 = cbj0; | ||
988 | + cj1 = cbj1; | ||
989 | + for (k=2;k<=n;k++) { | ||
990 | + cjk = 2.0*(k-1.0)*cj1/z-cj0; | ||
991 | + cj[k] = cjk; | ||
992 | + cj0 = cj1; | ||
993 | + cj1 = cjk; | ||
994 | + } | ||
995 | + } | ||
996 | + else { | ||
997 | + m = msta1(a0,200); | ||
998 | + if (m < n) nm = m; | ||
999 | + else m = msta2(a0,n,15); | ||
1000 | + cf2 = czero; | ||
1001 | + cf1 = complex<P> (1.0e-100,0.0); | ||
1002 | + for (k=m;k>=0;k--) { | ||
1003 | + cf = 2.0*(k+1.0)*cf1/z-cf2; | ||
1004 | + if (k <=nm) cj[k] = cf; | ||
1005 | + cf2 = cf1; | ||
1006 | + cf1 = cf; | ||
1007 | + } | ||
1008 | + if (abs(cbj0) > abs(cbj1)) cs = cbj0/cf; | ||
1009 | + else cs = cbj1/cf2; | ||
1010 | + for (k=0;k<=nm;k++) { | ||
1011 | + cj[k] *= cs; | ||
1012 | + } | ||
1013 | + } | ||
1014 | + for (k=2;k<=nm;k++) { | ||
1015 | + cjp[k] = cj[k-1]-(P)k*cj[k]/z; | ||
1016 | + } | ||
1017 | + ya0 = abs(cby0); | ||
1018 | + lb = 0; | ||
1019 | + cg0 = cby0; | ||
1020 | + cg1 = cby1; | ||
1021 | + for (k=2;k<=nm;k++) { | ||
1022 | + cyk = 2.0*(k-1.0)*cg1/z-cg0; | ||
1023 | + yak = abs(cyk); | ||
1024 | + ya1 = abs(cg0); | ||
1025 | + if ((yak < ya0) && (yak < ya1)) lb = k; | ||
1026 | + cy[k] = cyk; | ||
1027 | + cg0 = cg1; | ||
1028 | + cg1 = cyk; | ||
1029 | + } | ||
1030 | + lb0 = 0; | ||
1031 | + if ((lb > 4) && (imag(z) != 0.0)) { | ||
1032 | + while (lb != lb0) { | ||
1033 | + ch2 = cone; | ||
1034 | + ch1 = czero; | ||
1035 | + lb0 = lb; | ||
1036 | + for (k=lb;k>=1;k--) { | ||
1037 | + ch0 = 2.0*k*ch1/z-ch2; | ||
1038 | + ch2 = ch1; | ||
1039 | + ch1 = ch0; | ||
1040 | + } | ||
1041 | + cp12 = ch0; | ||
1042 | + cp22 = ch2; | ||
1043 | + ch2 = czero; | ||
1044 | + ch1 = cone; | ||
1045 | + for (k=lb;k>=1;k--) { | ||
1046 | + ch0 = 2.0*k*ch1/z-ch2; | ||
1047 | + ch2 = ch1; | ||
1048 | + ch1 = ch0; | ||
1049 | + } | ||
1050 | + cp11 = ch0; | ||
1051 | + cp21 = ch2; | ||
1052 | + if (lb == nm) | ||
1053 | + cj[lb+1] = 2.0*lb*cj[lb]/z-cj[lb-1]; | ||
1054 | + if (abs(cj[0]) > abs(cj[1])) { | ||
1055 | + cy[lb+1] = (cj[lb+1]*cby0-2.0*cp11/(M_PI*z))/cj[0]; | ||
1056 | + cy[lb] = (cj[lb]*cby0+2.0*cp12/(M_PI*z))/cj[0]; | ||
1057 | + } | ||
1058 | + else { | ||
1059 | + cy[lb+1] = (cj[lb+1]*cby1-2.0*cp21/(M_PI*z))/cj[1]; | ||
1060 | + cy[lb] = (cj[lb]*cby1+2.0*cp22/(M_PI*z))/cj[1]; | ||
1061 | + } | ||
1062 | + cyl2 = cy[lb+1]; | ||
1063 | + cyl1 = cy[lb]; | ||
1064 | + for (k=lb-1;k>=0;k--) { | ||
1065 | + cylk = 2.0*(k+1.0)*cyl1/z-cyl2; | ||
1066 | + cy[k] = cylk; | ||
1067 | + cyl2 = cyl1; | ||
1068 | + cyl1 = cylk; | ||
1069 | + } | ||
1070 | + cyl1 = cy[lb]; | ||
1071 | + cyl2 = cy[lb+1]; | ||
1072 | + for (k=lb+1;k<n;k++) { | ||
1073 | + cylk = 2.0*k*cyl2/z-cyl1; | ||
1074 | + cy[k+1] = cylk; | ||
1075 | + cyl1 = cyl2; | ||
1076 | + cyl2 = cylk; | ||
1077 | + } | ||
1078 | + for (k=2;k<=nm;k++) { | ||
1079 | + wa = abs(cy[k]); | ||
1080 | + if (wa < abs(cy[k-1])) lb = k; | ||
1081 | + } | ||
1082 | + } | ||
1083 | + } | ||
1084 | + for (k=2;k<=nm;k++) { | ||
1085 | + cyp[k] = cy[k-1]-(P)k*cy[k]/z; | ||
1086 | + } | ||
1087 | + return 0; | ||
1088 | +} | ||
1089 | + | ||
1090 | +template<typename P> | ||
1091 | +int cbessjynb(int n,complex<P> z,int &nm,complex<P> *cj, | ||
1092 | + complex<P> *cy,complex<P> *cjp,complex<P> *cyp) | ||
1093 | +{ | ||
1094 | + complex<P> cf,cf0,cf1,cf2,cbs,csu,csv,cs0,ce; | ||
1095 | + complex<P> ct1,cp0,cq0,cp1,cq1,cu,cbj0,cby0,cbj1,cby1; | ||
1096 | + complex<P> cyy,cbjk,ct2; | ||
1097 | + P a0,y0; | ||
1098 | + int k,m; | ||
1099 | + static P a[] = { | ||
1100 | + -0.7031250000000000e-1, | ||
1101 | + 0.1121520996093750, | ||
1102 | + -0.5725014209747314, | ||
1103 | + 6.074042001273483}; | ||
1104 | + static P b[] = { | ||
1105 | + 0.7324218750000000e-1, | ||
1106 | + -0.2271080017089844, | ||
1107 | + 1.727727502584457, | ||
1108 | + -2.438052969955606e1}; | ||
1109 | + static P a1[] = { | ||
1110 | + 0.1171875, | ||
1111 | + -0.1441955566406250, | ||
1112 | + 0.6765925884246826, | ||
1113 | + -6.883914268109947}; | ||
1114 | + static P b1[] = { | ||
1115 | + -0.1025390625, | ||
1116 | + 0.2775764465332031, | ||
1117 | + -1.993531733751297, | ||
1118 | + 2.724882731126854e1}; | ||
1119 | + | ||
1120 | + y0 = abs(imag(z)); | ||
1121 | + a0 = abs(z); | ||
1122 | + nm = n; | ||
1123 | + if (a0 < 1.0e-100) { | ||
1124 | + for (k=0;k<=n;k++) { | ||
1125 | + cj[k] = czero; | ||
1126 | + cy[k] = complex<P> (-1e308,0); | ||
1127 | + cjp[k] = czero; | ||
1128 | + cyp[k] = complex<P>(1e308,0); | ||
1129 | + } | ||
1130 | + cj[0] = cone; | ||
1131 | + cjp[1] = complex<P>(0.5,0.0); | ||
1132 | + return 0; | ||
1133 | + } | ||
1134 | + if ((a0 <= 300.0) || (n > (int)(0.25*a0))) { | ||
1135 | + if (n == 0) nm = 1; | ||
1136 | + m = msta1(a0,200); | ||
1137 | + if (m < nm) nm = m; | ||
1138 | + else m = msta2(a0,nm,15); | ||
1139 | + cbs = czero; | ||
1140 | + csu = czero; | ||
1141 | + csv = czero; | ||
1142 | + cf2 = czero; | ||
1143 | + cf1 = complex<P> (1.0e-100,0.0); | ||
1144 | + for (k=m;k>=0;k--) { | ||
1145 | + cf = 2.0*(k+1.0)*cf1/z-cf2; | ||
1146 | + if (k <= nm) cj[k] = cf; | ||
1147 | + if (((k & 1) == 0) && (k != 0)) { | ||
1148 | + if (y0 <= 1.0) { | ||
1149 | + cbs += 2.0*cf; | ||
1150 | + } | ||
1151 | + else { | ||
1152 | + cbs += (-1)*((k & 2)-1)*2.0*cf; | ||
1153 | + } | ||
1154 | + csu += (P)((-1)*((k & 2)-1))*cf/(P)k; | ||
1155 | + } | ||
1156 | + else if (k > 1) { | ||
1157 | + csv += (P)((-1)*((k & 2)-1)*k)*cf/(P)(k*k-1.0); | ||
1158 | + } | ||
1159 | + cf2 = cf1; | ||
1160 | + cf1 = cf; | ||
1161 | + } | ||
1162 | + if (y0 <= 1.0) cs0 = cbs+cf; | ||
1163 | + else cs0 = (cbs+cf)/cos(z); | ||
1164 | + for (k=0;k<=nm;k++) { | ||
1165 | + cj[k] /= cs0; | ||
1166 | + } | ||
1167 | + ce = log(0.5*z)+el; | ||
1168 | + cy[0] = M_2_PI*(ce*cj[0]-4.0*csu/cs0); | ||
1169 | + cy[1] = M_2_PI*(-cj[0]/z+(ce-1.0)*cj[1]-4.0*csv/cs0); | ||
1170 | + } | ||
1171 | + else { | ||
1172 | + ct1 = z-M_PI_4; | ||
1173 | + cp0 = cone; | ||
1174 | + for (k=0;k<4;k++) { | ||
1175 | + cp0 += a[k]*pow(z,-2.0*k-2.0); | ||
1176 | + } | ||
1177 | + cq0 = -0.125/z; | ||
1178 | + for (k=0;k<4;k++) { | ||
1179 | + cq0 += b[k] *pow(z,-2.0*k-3.0); | ||
1180 | + } | ||
1181 | + cu = sqrt(M_2_PI/z); | ||
1182 | + cbj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1)); | ||
1183 | + cby0 = cu*(cp0*sin(ct1)+cq0*cos(ct1)); | ||
1184 | + cj[0] = cbj0; | ||
1185 | + cy[0] = cby0; | ||
1186 | + ct2 = z-0.75*M_PI; | ||
1187 | + cp1 = cone; | ||
1188 | + for (k=0;k<4;k++) { | ||
1189 | + cp1 += a1[k]*pow(z,-2.0*k-2.0); | ||
1190 | + } | ||
1191 | + cq1 = 0.375/z; | ||
1192 | + for (k=0;k<4;k++) { | ||
1193 | + cq1 += b1[k]*pow(z,-2.0*k-3.0); | ||
1194 | + } | ||
1195 | + cbj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2)); | ||
1196 | + cby1 = cu*(cp1*sin(ct2)+cq1*cos(ct2)); | ||
1197 | + cj[1] = cbj1; | ||
1198 | + cy[1] = cby1; | ||
1199 | + for (k=2;k<=n;k++) { | ||
1200 | + cbjk = 2.0*(k-1.0)*cbj1/z-cbj0; | ||
1201 | + cj[k] = cbjk; | ||
1202 | + cbj0 = cbj1; | ||
1203 | + cbj1 = cbjk; | ||
1204 | + } | ||
1205 | + } | ||
1206 | + cjp[0] = -cj[1]; | ||
1207 | + for (k=1;k<=nm;k++) { | ||
1208 | + cjp[k] = cj[k-1]-(P)k*cj[k]/z; | ||
1209 | + } | ||
1210 | + if (abs(cj[0]) > 1.0) | ||
1211 | + cy[1] = (cj[1]*cy[0]-2.0/(M_PI*z))/cj[0]; | ||
1212 | + for (k=2;k<=nm;k++) { | ||
1213 | + if (abs(cj[k-1]) >= abs(cj[k-2])) | ||
1214 | + cyy = (cj[k]*cy[k-1]-2.0/(M_PI*z))/cj[k-1]; | ||
1215 | + else | ||
1216 | + cyy = (cj[k]*cy[k-2]-4.0*(k-1.0)/(M_PI*z*z))/cj[k-2]; | ||
1217 | + cy[k] = cyy; | ||
1218 | + } | ||
1219 | + cyp[0] = -cy[1]; | ||
1220 | + for (k=1;k<=nm;k++) { | ||
1221 | + cyp[k] = cy[k-1]-(P)k*cy[k]/z; | ||
1222 | + } | ||
1223 | + | ||
1224 | + return 0; | ||
1225 | +} | ||
1226 | + | ||
1227 | +template<typename P> | ||
1228 | +int cbessjyva(P v,complex<P> z,P &vm,complex<P>*cjv, | ||
1229 | + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp) | ||
1230 | +{ | ||
1231 | + complex<P> z1,z2,zk,cjvl,cr,ca,cjv0,cjv1,cpz,crp; | ||
1232 | + complex<P> cqz,crq,ca0,cck,csk,cyv0,cyv1,cju0,cju1,cb; | ||
1233 | + complex<P> cs,cs0,cr0,cs1,cr1,cec,cf,cf0,cf1,cf2; | ||
1234 | + complex<P> cfac0,cfac1,cg0,cg1,cyk,cp11,cp12,cp21,cp22; | ||
1235 | + complex<P> ch0,ch1,ch2,cyl1,cyl2,cylk; | ||
1236 | + | ||
1237 | + P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa; | ||
1238 | + int j,n,k,kz,l,lb,lb0,m; | ||
1239 | + | ||
1240 | + a0 = abs(z); | ||
1241 | + z1 = z; | ||
1242 | + z2 = z*z; | ||
1243 | + n = (int)v; | ||
1244 | + | ||
1245 | + | ||
1246 | + v0 = v-n; | ||
1247 | + | ||
1248 | + pv0 = M_PI*v0; | ||
1249 | + pv1 = M_PI*(1.0+v0); | ||
1250 | + if (a0 < 1.0e-100) { | ||
1251 | + for (k=0;k<=n;k++) { | ||
1252 | + cjv[k] = czero; | ||
1253 | + cyv[k] = complex<P> (-1e308,0); | ||
1254 | + cjvp[k] = czero; | ||
1255 | + cyvp[k] = complex<P> (1e308,0); | ||
1256 | + | ||
1257 | + } | ||
1258 | + if (v0 == 0.0) { | ||
1259 | + cjv[0] = cone; | ||
1260 | + cjvp[1] = complex<P> (0.5,0.0); | ||
1261 | + } | ||
1262 | + else { | ||
1263 | + cjvp[0] = complex<P> (1e308,0); | ||
1264 | + } | ||
1265 | + vm = v; | ||
1266 | + return 0; | ||
1267 | + } | ||
1268 | + if (real(z1) < 0.0) z1 = -z; | ||
1269 | + if (a0 <= 12.0) { | ||
1270 | + for (l=0;l<2;l++) { | ||
1271 | + vl = v0+l; | ||
1272 | + cjvl = cone; | ||
1273 | + cr = cone; | ||
1274 | + for (k=1;k<=40;k++) { | ||
1275 | + cr *= -0.25*z2/(k*(k+vl)); | ||
1276 | + cjvl += cr; | ||
1277 | + if (abs(cr) < abs(cjvl)*eps) break; | ||
1278 | + } | ||
1279 | + vg = 1.0 + vl; | ||
1280 | + ga = gamma(vg); | ||
1281 | + ca = pow(0.5*z1,vl)/ga; | ||
1282 | + if (l == 0) cjv0 = cjvl*ca; | ||
1283 | + else cjv1 = cjvl*ca; | ||
1284 | + } | ||
1285 | + } | ||
1286 | + else { | ||
1287 | + if (a0 >= 50.0) kz = 8; | ||
1288 | + else if (a0 >= 35.0) kz = 10; | ||
1289 | + else kz = 11; | ||
1290 | + for (j=0;j<2;j++) { | ||
1291 | + vv = 4.0*(j+v0)*(j+v0); | ||
1292 | + cpz = cone; | ||
1293 | + crp = cone; | ||
1294 | + for (k=1;k<=kz;k++) { | ||
1295 | + crp = -0.78125e-2*crp*(vv-pow(4.0*k-3.0,2.0))* | ||
1296 | + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*z2); | ||
1297 | + cpz += crp; | ||
1298 | + } | ||
1299 | + cqz = cone; | ||
1300 | + crq = cone; | ||
1301 | + for (k=1;k<=kz;k++) { | ||
1302 | + crq = -0.78125e-2*crq*(vv-pow(4.0*k-1.0,2.0))* | ||
1303 | + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*z2); | ||
1304 | + cqz += crq; | ||
1305 | + } | ||
1306 | + cqz *= 0.125*(vv-1.0)/z1; | ||
1307 | + zk = z1-(0.5*(j+v0)+0.25)*M_PI; | ||
1308 | + ca0 = sqrt(M_2_PI/z1); | ||
1309 | + cck = cos(zk); | ||
1310 | + csk = sin(zk); | ||
1311 | + if (j == 0) { | ||
1312 | + cjv0 = ca0*(cpz*cck-cqz*csk); | ||
1313 | + cyv0 = ca0*(cpz*csk+cqz+cck); | ||
1314 | + } | ||
1315 | + else { | ||
1316 | + cjv1 = ca0*(cpz*cck-cqz*csk); | ||
1317 | + cyv1 = ca0*(cpz*csk+cqz*cck); | ||
1318 | + } | ||
1319 | + } | ||
1320 | + } | ||
1321 | + if (a0 <= 12.0) { | ||
1322 | + if (v0 != 0.0) { | ||
1323 | + for (l=0;l<2;l++) { | ||
1324 | + vl = v0+l; | ||
1325 | + cjvl = cone; | ||
1326 | + cr = cone; | ||
1327 | + for (k=1;k<=40;k++) { | ||
1328 | + cr *= -0.25*z2/(k*(k-vl)); | ||
1329 | + cjvl += cr; | ||
1330 | + if (abs(cr) < abs(cjvl)*eps) break; | ||
1331 | + } | ||
1332 | + vg = 1.0-vl; | ||
1333 | + gb = gamma(vg); | ||
1334 | + cb = pow(2.0/z1,vl)/gb; | ||
1335 | + if (l == 0) cju0 = cjvl*cb; | ||
1336 | + else cju1 = cjvl*cb; | ||
1337 | + } | ||
1338 | + cyv0 = (cjv0*cos(pv0)-cju0)/sin(pv0); | ||
1339 | + cyv1 = (cjv1*cos(pv1)-cju1)/sin(pv1); | ||
1340 | + } | ||
1341 | + else { | ||
1342 | + cec = log(0.5*z1)+el; | ||
1343 | + cs0 = czero; | ||
1344 | + w0 = 0.0; | ||
1345 | + cr0 = cone; | ||
1346 | + for (k=1;k<=30;k++) { | ||
1347 | + w0 += 1.0/k; | ||
1348 | + cr0 *= -0.25*z2/(P)(k*k); | ||
1349 | + cs0 += cr0*w0; | ||
1350 | + } | ||
1351 | + cyv0 = M_2_PI*(cec*cjv0-cs0); | ||
1352 | + cs1 = cone; | ||
1353 | + w1 = 0.0; | ||
1354 | + cr1 = cone; | ||
1355 | + for (k=1;k<=30;k++) { | ||
1356 | + w1 += 1.0/k; | ||
1357 | + cr1 *= -0.25*z2/(k*(k+1.0)); | ||
1358 | + cs1 += cr1*(2.0*w1+1.0/(k+1.0)); | ||
1359 | + } | ||
1360 | + cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1); | ||
1361 | + } | ||
1362 | + } | ||
1363 | + if (real(z) < 0.0) { | ||
1364 | + cfac0 = exp(pv0*cii); | ||
1365 | + cfac1 = exp(pv1*cii); | ||
1366 | + if (imag(z) < 0.0) { | ||
1367 | + cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; | ||
1368 | + cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; | ||
1369 | + cjv0 /= cfac0; | ||
1370 | + cjv1 /= cfac1; | ||
1371 | + } | ||
1372 | + else if (imag(z) > 0.0) { | ||
1373 | + cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; | ||
1374 | + cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; | ||
1375 | + cjv0 *= cfac0; | ||
1376 | + cjv1 *= cfac1; | ||
1377 | + } | ||
1378 | + } | ||
1379 | + cjv[0] = cjv0; | ||
1380 | + cjv[1] = cjv1; | ||
1381 | + if ((n >= 2) && (n <= (int)(0.25*a0))) { | ||
1382 | + cf0 = cjv0; | ||
1383 | + cf1 = cjv1; | ||
1384 | + for (k=2;k<= n;k++) { | ||
1385 | + cf = 2.0*(k+v0-1.0)*cf1/z-cf0; | ||
1386 | + cjv[k] = cf; | ||
1387 | + cf0 = cf1; | ||
1388 | + cf1 = cf; | ||
1389 | + } | ||
1390 | + } | ||
1391 | + else if (n >= 2) { | ||
1392 | + m = msta1(a0,200); | ||
1393 | + if (m < n) n = m; | ||
1394 | + else m = msta2(a0,n,15); | ||
1395 | + cf2 = czero; | ||
1396 | + cf1 = complex<P>(1.0e-100,0.0); | ||
1397 | + for (k=m;k>=0;k--) { | ||
1398 | + cf = 2.0*(v0+k+1.0)*cf1/z-cf2; | ||
1399 | + if (k <= n) cjv[k] = cf; | ||
1400 | + cf2 = cf1; | ||
1401 | + cf1 = cf; | ||
1402 | + } | ||
1403 | + if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf; | ||
1404 | + else cs = cjv1/cf2; | ||
1405 | + for (k=0;k<=n;k++) { | ||
1406 | + cjv[k] *= cs; | ||
1407 | + } | ||
1408 | + } | ||
1409 | + cjvp[0] = v0*cjv[0]/z-cjv[1]; | ||
1410 | + for (k=1;k<=n;k++) { | ||
1411 | + cjvp[k] = -(k+v0)*cjv[k]/z+cjv[k-1]; | ||
1412 | + } | ||
1413 | + cyv[0] = cyv0; | ||
1414 | + cyv[1] = cyv1; | ||
1415 | + ya0 = abs(cyv0); | ||
1416 | + lb = 0; | ||
1417 | + cg0 = cyv0; | ||
1418 | + cg1 = cyv1; | ||
1419 | + for (k=2;k<=n;k++) { | ||
1420 | + cyk = 2.0*(v0+k-1.0)*cg1/z-cg0; | ||
1421 | + yak = abs(cyk); | ||
1422 | + ya1 = abs(cg0); | ||
1423 | + if ((yak < ya0) && (yak< ya1)) lb = k; | ||
1424 | + cyv[k] = cyk; | ||
1425 | + cg0 = cg1; | ||
1426 | + cg1 = cyk; | ||
1427 | + } | ||
1428 | + lb0 = 0; | ||
1429 | + if ((lb > 4) && (imag(z) != 0.0)) { | ||
1430 | + while(lb != lb0) { | ||
1431 | + ch2 = cone; | ||
1432 | + ch1 = czero; | ||
1433 | + lb0 = lb; | ||
1434 | + for (k=lb;k>=1;k--) { | ||
1435 | + ch0 = 2.0*(k+v0)*ch1/z-ch2; | ||
1436 | + ch2 = ch1; | ||
1437 | + ch1 = ch0; | ||
1438 | + } | ||
1439 | + cp12 = ch0; | ||
1440 | + cp22 = ch2; | ||
1441 | + ch2 = czero; | ||
1442 | + ch1 = cone; | ||
1443 | + for (k=lb;k>=1;k--) { | ||
1444 | + ch0 = 2.0*(k+v0)*ch1/z-ch2; | ||
1445 | + ch2 = ch1; | ||
1446 | + ch1 = ch0; | ||
1447 | + } | ||
1448 | + cp11 = ch0; | ||
1449 | + cp21 = ch2; | ||
1450 | + if (lb == n) | ||
1451 | + cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1]; | ||
1452 | + if (abs(cjv[0]) > abs(cjv[1])) { | ||
1453 | + cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0]; | ||
1454 | + cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0]; | ||
1455 | + } | ||
1456 | + else { | ||
1457 | + cyv[lb+1] = (cjv[lb+1]*cyv1-2.0*cp21/(M_PI*z))/cjv[1]; | ||
1458 | + cyv[lb] = (cjv[lb]*cyv1+2.0*cp22/(M_PI*z))/cjv[1]; | ||
1459 | + } | ||
1460 | + cyl2 = cyv[lb+1]; | ||
1461 | + cyl1 = cyv[lb]; | ||
1462 | + for (k=lb-1;k>=0;k--) { | ||
1463 | + cylk = 2.0*(k+v0+1.0)*cyl1/z-cyl2; | ||
1464 | + cyv[k] = cylk; | ||
1465 | + cyl2 = cyl1; | ||
1466 | + cyl1 = cylk; | ||
1467 | + } | ||
1468 | + cyl1 = cyv[lb]; | ||
1469 | + cyl2 = cyv[lb+1]; | ||
1470 | + for (k=lb+1;k<n;k++) { | ||
1471 | + cylk = 2.0*(k+v0)*cyl2/z-cyl1; | ||
1472 | + cyv[k+1] = cylk; | ||
1473 | + cyl1 = cyl2; | ||
1474 | + cyl2 = cylk; | ||
1475 | + } | ||
1476 | + for (k=2;k<=n;k++) { | ||
1477 | + wa = abs(cyv[k]); | ||
1478 | + if (wa < abs(cyv[k-1])) lb = k; | ||
1479 | + } | ||
1480 | + } | ||
1481 | + } | ||
1482 | + cyvp[0] = v0*cyv[0]/z-cyv[1]; | ||
1483 | + for (k=1;k<=n;k++) { | ||
1484 | + cyvp[k] = cyv[k-1]-(k+v0)*cyv[k]/z; | ||
1485 | + } | ||
1486 | + vm = n+v0; | ||
1487 | + return 0; | ||
1488 | +} | ||
1489 | + | ||
1490 | +template<typename P> | ||
1491 | +int cbessjyva_sph(int v,complex<P> z,P &vm,complex<P>*cjv, | ||
1492 | + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp) | ||
1493 | +{ | ||
1494 | + //first, compute the bessel functions of fractional order | ||
1495 | + cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | ||
1496 | + | ||
1497 | + //iterate through each and scale | ||
1498 | + for(int n = 0; n<=v; n++) | ||
1499 | + { | ||
1500 | + | ||
1501 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | ||
1502 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | ||
1503 | + | ||
1504 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | ||
1505 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | ||
1506 | + } | ||
1507 | + | ||
1508 | + return 0; | ||
1509 | + | ||
1510 | +} | ||
1511 | + | ||
1512 | +} //end namespace rts | ||
1513 | + | ||
1514 | + | ||
1515 | +#endif |
stim/math/filters/gauss3.h
@@ -13,7 +13,7 @@ namespace stim | @@ -13,7 +13,7 @@ namespace stim | ||
13 | ///@param dimx is the size of in* in the z direction. | 13 | ///@param dimx is the size of in* in the z direction. |
14 | ///@param stdx is the standard deviation (in pixels) along the x axis. | 14 | ///@param stdx is the standard deviation (in pixels) along the x axis. |
15 | ///@param stdy is the standard deviation (in pixels) along the y axis. | 15 | ///@param stdy is the standard deviation (in pixels) along the y axis. |
16 | - ///@param nstds specifies the number of standard deviations of the Gaussian that will be k ept in the kernel. | 16 | + ///@param nstds specifies the number of standard deviations of the Gaussian that will be kept in the kernel. |
17 | template<typename T, typename K> | 17 | template<typename T, typename K> |
18 | void cpu_gauss3(T* in, K dimx, K dimy, K dimz, K stdx, K stdy, K stdz, size_t nstds = 3) | 18 | void cpu_gauss3(T* in, K dimx, K dimy, K dimz, K stdx, K stdy, K stdz, size_t nstds = 3) |
19 | { | 19 | { |
stim/math/matrix.h
@@ -37,6 +37,20 @@ struct matrix | @@ -37,6 +37,20 @@ struct matrix | ||
37 | return *this; | 37 | return *this; |
38 | } | 38 | } |
39 | 39 | ||
40 | + //create a symmetric matrix given the rhs values, given in column-major order | ||
41 | + CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){ | ||
42 | + const size_t L = (N*N+N)/2; //store the number of values | ||
43 | + | ||
44 | + size_t r, c; | ||
45 | + r = c = 0; | ||
46 | + for(size_t i = 0; i < L; i++){ //for each value | ||
47 | + if(r == c) M[c * N + r] = rhs[i]; | ||
48 | + else M[c*N + r] = M[r * N + c] = rhs[i]; | ||
49 | + r++; | ||
50 | + if(r == N) r = ++c; | ||
51 | + } | ||
52 | + } | ||
53 | + | ||
40 | CUDA_CALLABLE T& operator()(int row, int col) | 54 | CUDA_CALLABLE T& operator()(int row, int col) |
41 | { | 55 | { |
42 | return M[col * N + row]; | 56 | return M[col * N + row]; |
@@ -91,6 +105,14 @@ struct matrix | @@ -91,6 +105,14 @@ struct matrix | ||
91 | 105 | ||
92 | return ss.str(); | 106 | return ss.str(); |
93 | } | 107 | } |
108 | + | ||
109 | + static matrix<T, N> identity() { | ||
110 | + matrix<T, N> I; | ||
111 | + I = 0; | ||
112 | + for (size_t i = 0; i < N; i++) | ||
113 | + I.M[i * N + i] = 1; | ||
114 | + return I; | ||
115 | + } | ||
94 | }; | 116 | }; |
95 | 117 | ||
96 | } //end namespace rts | 118 | } //end namespace rts |
1 | +#ifndef STIM_MATRIX_SYM_H | ||
2 | +#define STIM_MATRIX_SYM_H | ||
3 | + | ||
4 | +#include <stim/cuda/cudatools/callable.h> | ||
5 | +#include <stim/math/matrix.h> | ||
6 | + | ||
7 | +/* This class represents a rank 2, 3-dimensional tensor viable | ||
8 | +for representing tensor fields such as structure and diffusion tensors | ||
9 | +*/ | ||
10 | +namespace stim{ | ||
11 | + | ||
12 | +template <typename T, int D> | ||
13 | +class matrix_sym{ | ||
14 | + | ||
15 | +protected: | ||
16 | + //values are stored in column-major order as a lower-triangular matrix | ||
17 | + T M[D*(D + 1)/2]; | ||
18 | + | ||
19 | + static size_t idx(size_t r, size_t c) { | ||
20 | + //if the index is in the upper-triangular portion, swap the indices | ||
21 | + if(r < c){ | ||
22 | + size_t t = r; | ||
23 | + r = c; | ||
24 | + c = t; | ||
25 | + } | ||
26 | + | ||
27 | + size_t ci = (c + 1) * (D + (D - c))/2 - 1; //index to the end of column c | ||
28 | + size_t i = ci - (D - r - 1); | ||
29 | + return i; | ||
30 | + } | ||
31 | + | ||
32 | + //calculate the row and column given an index | ||
33 | + //static void indices(size_t& r, size_t& c, size_t idx) { | ||
34 | + // size_t col = 0; | ||
35 | + // for ( ; col < D; col++) | ||
36 | + // if(idx <= ((D - col + D) * (col + 1)/2 - 1)) | ||
37 | + // break; | ||
38 | + | ||
39 | + // c = col; | ||
40 | + // size_t ci = (D - (col - 1) + D) * col / 2 - 1; //index to the end of last column col -1 | ||
41 | + // r = idx - ci + c - 1; | ||
42 | + //} | ||
43 | + static void indices(size_t& r, size_t& c, size_t idx) { | ||
44 | + size_t cf = -1/2 * sqrt(4 * D * D + 4 * D - (7 + 8 * idx)) + D - 1/2; | ||
45 | + c = ceil(cf); | ||
46 | + r = idx - D * c + c * (c + 1) / 2; | ||
47 | + } | ||
48 | + | ||
49 | +public: | ||
50 | + //return the symmetric matrix associated with this tensor | ||
51 | + stim::matrix<T, D> mat() { | ||
52 | + stim::matrix<T, D> r; | ||
53 | + r.setsym(M); | ||
54 | + return r; | ||
55 | + } | ||
56 | + | ||
57 | + CUDA_CALLABLE T& operator()(int r, int c) { | ||
58 | + return M[idx(r, c)]; | ||
59 | + } | ||
60 | + | ||
61 | + CUDA_CALLABLE matrix_sym<T, D> operator=(T rhs) { | ||
62 | + int Nsq = D*(D+1)/2; | ||
63 | + for(int i=0; i<Nsq; i++) | ||
64 | + M[i] = rhs; | ||
65 | + | ||
66 | + return *this; | ||
67 | + } | ||
68 | + | ||
69 | + CUDA_CALLABLE matrix_sym<T, D> operator=(matrix_sym<T, D> rhs) { | ||
70 | + size_t N = D * (D + 1) / 2; | ||
71 | + for (size_t i = 0; i < N; i++) M[i] = rhs.M[i]; | ||
72 | + return *this; | ||
73 | + } | ||
74 | + | ||
75 | + CUDA_CALLABLE T trace() { | ||
76 | + T tr = 0; | ||
77 | + for (size_t i = 0; i < D; i++) //for each diagonal value | ||
78 | + tr += M[idx(i, i)]; //add the value on the diagonal | ||
79 | + return tr; | ||
80 | + } | ||
81 | + // overload matrix multiply scalar | ||
82 | + CUDA_CALLABLE void operator_product(matrix_sym<T, D> &B, T rhs) { | ||
83 | + int Nsq = D*(D+1)/2; | ||
84 | + for(int i=0; i<Nsq; i++) | ||
85 | + B.M[i] *= rhs; | ||
86 | + } | ||
87 | + | ||
88 | + //return the tensor as a string | ||
89 | + std::string str() { | ||
90 | + std::stringstream ss; | ||
91 | + for(int r = 0; r < D; r++){ | ||
92 | + ss << "| "; | ||
93 | + for(int c=0; c<D; c++) | ||
94 | + { | ||
95 | + ss << (*this)(r, c) << " "; | ||
96 | + } | ||
97 | + ss << "|" << std::endl; | ||
98 | + } | ||
99 | + | ||
100 | + return ss.str(); | ||
101 | + } | ||
102 | + | ||
103 | + //returns an identity matrix | ||
104 | + static matrix_sym<T, D> identity() { | ||
105 | + matrix_sym<T, D> I; | ||
106 | + I = 0; | ||
107 | + for (size_t i = 0; i < D; i++) | ||
108 | + I.M[matrix_sym<T, D>::idx(i, i)] = 1; | ||
109 | + return I; | ||
110 | + } | ||
111 | +}; | ||
112 | + | ||
113 | + | ||
114 | + | ||
115 | +} //end namespace stim | ||
116 | + | ||
117 | + | ||
118 | +#endif |
1 | +#ifndef STIM_TENSOR2_H | ||
2 | +#define STIM_TENSOR2_H | ||
3 | + | ||
4 | +#include "matrix_sym.h" | ||
5 | + | ||
6 | +namespace stim { | ||
7 | + | ||
8 | +/*This class represents a symmetric rank-2 2D tensor, useful for structure tensors | ||
9 | +*/ | ||
10 | +template<typename T> | ||
11 | +class tensor2 : public matrix_sym<T, 2> { | ||
12 | + | ||
13 | +protected: | ||
14 | + | ||
15 | +public: | ||
16 | + | ||
17 | + //calculate the eigenvectors and eigenvalues of the tensor | ||
18 | + CUDA_CALLABLE void eig(stim::matrix<T, 2>& v, stim::matrix<T, 2>& lambda) { | ||
19 | + | ||
20 | + lambda = 0; //initialize the eigenvalue matrix to zero | ||
21 | + | ||
22 | + T t = M[0] + M[2]; //calculate the trace of the tensor | ||
23 | + T d = M[0] * M[2] - M[1] * M[1]; //calculate the determinant of the tensor | ||
24 | + | ||
25 | + lambda(0, 0) = t / 2 + sqrt(t*t / 4 - d); | ||
26 | + lambda(1, 1) = t / 2 - sqrt(t*t / 4 - d); | ||
27 | + | ||
28 | + if (M[1] == 0) { | ||
29 | + v = stim::matrix<T, 2>::identity(); | ||
30 | + } | ||
31 | + else { | ||
32 | + v(0, 0) = lambda(0, 0) - d; | ||
33 | + v(0, 1) = lambda(1, 1) - d; | ||
34 | + v(1, 0) = v(1, 1) = M[1]; | ||
35 | + } | ||
36 | + } | ||
37 | + | ||
38 | + CUDA_CALLABLE tensor2<T> operator=(stim::matrix_sym<T, 2> rhs){ | ||
39 | + stim::matrix_sym<T, 2>::operator=(rhs); | ||
40 | + return *this; | ||
41 | + } | ||
42 | +}; | ||
43 | + | ||
44 | + | ||
45 | +} //end namespace stim | ||
46 | + | ||
47 | + | ||
48 | +#endif | ||
0 | \ No newline at end of file | 49 | \ No newline at end of file |
1 | +#ifndef STIM_TENSOR3_H | ||
2 | +#define STIM_TENSOR3_H | ||
3 | + | ||
4 | +#include "matrix_sym.h" | ||
5 | +#include <stim/math/constants.h> | ||
6 | + | ||
7 | +namespace stim { | ||
8 | + | ||
9 | + /*This class represents a symmetric rank-2 2D tensor, useful for structure tensors | ||
10 | + */ | ||
11 | + | ||
12 | + //Matrix ID cheat sheet | ||
13 | + // | 0 1 2 | | ||
14 | + // | 1 3 4 | | ||
15 | + // | 2 4 5 | | ||
16 | + template<typename T> | ||
17 | + class tensor3 : public matrix_sym<T, 3> { | ||
18 | + | ||
19 | + protected: | ||
20 | + | ||
21 | + public: | ||
22 | + | ||
23 | + //calculates the determinant of the tensor | ||
24 | + CUDA_CALLABLE T det() { | ||
25 | + return M[0] * M[3] * M[5] + 2 * (M[1] * M[4] * M[2]) - M[2] * M[3] * M[2] - M[1] * M[1] * M[5] - M[0] * M[4] * M[4]; | ||
26 | + } | ||
27 | + | ||
28 | + //calculate the eigenvalues for the tensor | ||
29 | + //adapted from https://en.wikipedia.org/wiki/Eigenvalue_algorithm | ||
30 | + | ||
31 | + CUDA_CALLABLE stim::vec3<T> lambda() { | ||
32 | + stim::vec3<T> lam; | ||
33 | + T p1 = M[1] * M[1] + M[2] * M[2] + M[4] * M[4]; //calculate the sum of the squared off-diagonal values | ||
34 | + if (p1 == 0) { //if this value is zero, the matrix is diagonal | ||
35 | + lam[0] = M[0]; //the eigenvalues are the diagonal values | ||
36 | + lam[1] = M[3]; | ||
37 | + lam[2] = M[5]; | ||
38 | + return lam; //return the eigenvalue vector | ||
39 | + } | ||
40 | + | ||
41 | + T tr = matrix_sym<T, 3>::trace(); //calculate the trace of the matrix | ||
42 | + T q = tr / 3; | ||
43 | + T p2 = (M[0] - q) * (M[0] - q) + (M[3] - q) * (M[3] - q) + (M[5] - q) * (M[5] - q) + 2 * p1; | ||
44 | + T p = sqrt(p2 / 6); | ||
45 | + tensor3<T> Q; //allocate space for Q (q along the diagonals) | ||
46 | + Q = (T)0; //initialize Q to zeros | ||
47 | + Q(0, 0) = Q(1, 1) = Q(2, 2) = q; //set the diagonal values to q | ||
48 | + tensor3<T> B = *this; // B1 = A | ||
49 | + B.M[0] = (B.M[0] - q); | ||
50 | + B.M[3] = (B.M[3] - q); | ||
51 | + B.M[5] = (B.M[5] - q); | ||
52 | + matrix_sym<T, 3>::operator_product(B, 1/p); // B = (1/p) * (A - q*I) | ||
53 | + //B.M[0] = B.M[0] * 1/p; | ||
54 | + //B.M[1] = B.M[1] * 1/p; | ||
55 | + //B.M[2] = B.M[2] * 1/p; | ||
56 | + //B.M[3] = B.M[3] * 1/p; | ||
57 | + //B.M[4] = B.M[4] * 1/p; | ||
58 | + //B.M[5] = B.M[5] * 1/p; | ||
59 | + T r = B.det() / 2; //calculate det(B) / 2 | ||
60 | + | ||
61 | + // In exact arithmetic for a symmetric matrix - 1 <= r <= 1 | ||
62 | + // but computation error can leave it slightly outside this range. | ||
63 | + T phi; | ||
64 | + if (r <= -1) phi = stim::PI / 3; | ||
65 | + else if (r >= 1) phi = 0; | ||
66 | + else phi = acos(r) / 3; | ||
67 | + | ||
68 | + // the eigenvalues satisfy eig3 >= eig2 >= eig1 | ||
69 | + lam[2] = q + 2 * p * cos(phi); | ||
70 | + lam[0] = q + 2 * p * cos(phi + (2 * stim::PI / 3)); | ||
71 | + lam[1] = 3 * q - (lam[2] + lam[0]); | ||
72 | + | ||
73 | + return lam; | ||
74 | + } | ||
75 | + | ||
76 | + CUDA_CALLABLE stim::matrix<T, 3> eig(stim::vec3<T>& lambda = stim::vec3<T>()) { | ||
77 | + stim::matrix<T, 3> V; | ||
78 | + | ||
79 | + stim::matrix<T, 3> M1 = matrix_sym<T, 3>::mat(); | ||
80 | + stim::matrix<T, 3> M2 = matrix_sym<T, 3>::mat(); | ||
81 | + stim::matrix<T, 3> M3 = matrix_sym<T, 3>::mat(); // fill a tensor with symmetric values | ||
82 | + | ||
83 | + M1.operator_minus(M1, lambda[0]); // M1 = A - lambda[0] * I | ||
84 | + | ||
85 | + M2.operator_minus(M2, lambda[1]); // M2 = A - lambda[1] * I | ||
86 | + | ||
87 | + M3.operator_minus(M3, lambda[2]); // M3 = A - lambda[2] * I | ||
88 | + | ||
89 | + T Mod = 0; // module of one column | ||
90 | + | ||
91 | + T tmp1[9] = {0}; | ||
92 | + for(int i = 0; i < 9; i++) { | ||
93 | + for(int j = 0; j < 3; j++){ | ||
94 | + tmp1[i] += M2(i%3, j) * M3(j, i/3); | ||
95 | + } | ||
96 | + } | ||
97 | + if(tmp1[0] * tmp1[1] * tmp1[2] != 0) { // test whether it is zero column | ||
98 | + Mod = sqrt(pow(tmp1[0],2) + pow(tmp1[1],2) + pow(tmp1[2],2)); | ||
99 | + V(0, 0) = tmp1[0]/Mod; | ||
100 | + V(1, 0) = tmp1[1]/Mod; | ||
101 | + V(2, 0) = tmp1[2]/Mod; | ||
102 | + } | ||
103 | + else { | ||
104 | + Mod = sqrt(pow(tmp1[3],2) + pow(tmp1[4],2) + pow(tmp1[5],2)); | ||
105 | + V(0, 0) = tmp1[3]/Mod; | ||
106 | + V(1, 0) = tmp1[4]/Mod; | ||
107 | + V(2, 0) = tmp1[5]/Mod; | ||
108 | + } | ||
109 | + | ||
110 | + T tmp2[9] = {0}; | ||
111 | + for(int i = 0; i < 9; i++) { | ||
112 | + for(int j = 0; j < 3; j++){ | ||
113 | + tmp2[i] += M1(i%3, j) * M3(j, i/3); | ||
114 | + } | ||
115 | + } | ||
116 | + if(tmp2[0] * tmp2[1] * tmp2[2] != 0) { | ||
117 | + Mod = sqrt(pow(tmp2[0],2) + pow(tmp2[1],2) + pow(tmp2[2],2)); | ||
118 | + V(0, 1) = tmp2[0]/Mod; | ||
119 | + V(1, 1) = tmp2[1]/Mod; | ||
120 | + V(2, 1) = tmp2[2]/Mod; | ||
121 | + } | ||
122 | + else { | ||
123 | + Mod = sqrt(pow(tmp2[3],2) + pow(tmp2[4],2) + pow(tmp2[5],2)); | ||
124 | + V(0, 1) = tmp2[3]/Mod; | ||
125 | + V(1, 1) = tmp2[4]/Mod; | ||
126 | + V(2, 1) = tmp2[5]/Mod; | ||
127 | + } | ||
128 | + | ||
129 | + T tmp3[9] = {0}; | ||
130 | + for(int i = 0; i < 9; i++) { | ||
131 | + for(int j = 0; j < 3; j++){ | ||
132 | + tmp3[i] += M1(i%3, j) * M2(j, i/3); | ||
133 | + } | ||
134 | + } | ||
135 | + if(tmp3[0] * tmp3[1] * tmp3[2] != 0) { | ||
136 | + Mod = sqrt(pow(tmp3[0],2) + pow(tmp3[1],2) + pow(tmp3[2],2)); | ||
137 | + V(0, 2) = tmp3[0]/Mod; | ||
138 | + V(1, 2) = tmp3[1]/Mod; | ||
139 | + V(2, 2) = tmp3[2]/Mod; | ||
140 | + } | ||
141 | + else { | ||
142 | + Mod = sqrt(pow(tmp3[3],2) + pow(tmp3[4],2) + pow(tmp3[5],2)); | ||
143 | + V(0, 2) = tmp3[3]/Mod; | ||
144 | + V(1, 2) = tmp3[4]/Mod; | ||
145 | + V(2, 2) = tmp3[5]/Mod; | ||
146 | + } | ||
147 | + return V; //return the eigenvector matrix | ||
148 | + } | ||
149 | + // return one specific eigenvector | ||
150 | + CUDA_CALLABLE stim::vec3<T> eig(int n, stim::vec3<T>& lambda = stim::vec3<T>()) { | ||
151 | + stim::matrix<T, 3> V = eig(lambda); | ||
152 | + stim::vec3<T> v; | ||
153 | + for(int i = 0; i < 3; i++) | ||
154 | + v[i] = V(i, n); | ||
155 | + return v; | ||
156 | + } | ||
157 | + | ||
158 | + | ||
159 | + CUDA_CALLABLE T linear(stim::vec3<T>& lambda = stim::vec3<T>()) { | ||
160 | + T cl = (lambda[2] - lambda[1]) / (lambda[0] + lambda[1] + lambda[2]); | ||
161 | + return cl; | ||
162 | + } | ||
163 | + | ||
164 | + CUDA_CALLABLE T Planar(stim::vec3<T>& lambda = stim::vec3<T>()) { | ||
165 | + T cp = 2 * (lambda[1] - lambda[0]) / (lambda[0] + lambda[1] + lambda[2]); | ||
166 | + return cp; | ||
167 | + } | ||
168 | + | ||
169 | + CUDA_CALLABLE T spherical(stim::vec3<T>& lambda = stim::vec3<T>()) { | ||
170 | + T cs = 3 * lambda[0] / (lambda[0] + lambda[1] + lambda[2]); | ||
171 | + return cs; | ||
172 | + } | ||
173 | + | ||
174 | + CUDA_CALLABLE T fa(stim::vec3<T>& lambda = stim::vec3<T>()) { | ||
175 | + T fa = sqrt(1/2) * sqrt(pow(lambda[2] - lambda[1], 2) + pow(lambda[1] - lambda[0], 2) + pow(lambda[0] - lambda[2], 2)) / sqrt(pow(lambda[2], 2) + pow(lambda[1], 2) + pow(lambda[0], 2)); | ||
176 | + } | ||
177 | + //JACK 2: write functions to calculate anisotropy | ||
178 | + //ex: fa(), linear(), planar(), spherical() | ||
179 | + | ||
180 | + | ||
181 | + //calculate the eigenvectors and eigenvalues of the tensor | ||
182 | + //CUDA_CALLABLE void eig(stim::matrix<T, 3>& v, stim::matrix<T, 3>& lambda){ | ||
183 | + | ||
184 | + //} | ||
185 | + CUDA_CALLABLE tensor3<T> operator=(T rhs) { | ||
186 | + stim::matrix_sym<T, 3>::operator=(rhs); | ||
187 | + return *this; | ||
188 | + } | ||
189 | + | ||
190 | + CUDA_CALLABLE tensor3<T> operator=(stim::matrix_sym<T, 3> rhs) { | ||
191 | + stim::matrix_sym<T, 3>::operator=(rhs); | ||
192 | + return *this; | ||
193 | + } | ||
194 | + }; | ||
195 | + | ||
196 | + | ||
197 | +} //end namespace stim | ||
198 | + | ||
199 | + | ||
200 | +#endif | ||
0 | \ No newline at end of file | 201 | \ No newline at end of file |
1 | +#ifndef STIM_VEC3_H | ||
2 | +#define STIM_VEC3_H | ||
3 | + | ||
4 | + | ||
5 | +#include <stim/cuda/cudatools/callable.h> | ||
6 | +#include <cmath> | ||
7 | + | ||
8 | + | ||
9 | +namespace stim{ | ||
10 | + | ||
11 | + | ||
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | ||
13 | +template<typename T> | ||
14 | +class vec3{ | ||
15 | + | ||
16 | +protected: | ||
17 | + T ptr[3]; | ||
18 | + | ||
19 | +public: | ||
20 | + | ||
21 | + CUDA_CALLABLE vec3(){} | ||
22 | + | ||
23 | + CUDA_CALLABLE vec3(T v){ | ||
24 | + ptr[0] = ptr[1] = ptr[2] = v; | ||
25 | + } | ||
26 | + | ||
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | ||
28 | + ptr[0] = x; | ||
29 | + ptr[1] = y; | ||
30 | + ptr[2] = z; | ||
31 | + } | ||
32 | + | ||
33 | + //copy constructor | ||
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | ||
35 | + ptr[0] = other.ptr[0]; | ||
36 | + ptr[1] = other.ptr[1]; | ||
37 | + ptr[2] = other.ptr[2]; | ||
38 | + } | ||
39 | + | ||
40 | + //access an element using an index | ||
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | ||
42 | + return ptr[idx]; | ||
43 | + } | ||
44 | + | ||
45 | + CUDA_CALLABLE T* data(){ | ||
46 | + return ptr; | ||
47 | + } | ||
48 | + | ||
49 | +/// Casting operator. Creates a new vector with a new type U. | ||
50 | + template< typename U > | ||
51 | + CUDA_CALLABLE operator vec3<U>(){ | ||
52 | + vec3<U> result; | ||
53 | + result.ptr[0] = (U)ptr[0]; | ||
54 | + result.ptr[1] = (U)ptr[1]; | ||
55 | + result.ptr[2] = (U)ptr[2]; | ||
56 | + | ||
57 | + return result; | ||
58 | + } | ||
59 | + | ||
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | ||
61 | + CUDA_CALLABLE T len_sq() const{ | ||
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | ||
63 | + } | ||
64 | + | ||
65 | + /// computes the Euclidean length of the vector | ||
66 | + CUDA_CALLABLE T len() const{ | ||
67 | + return sqrt(len_sq()); | ||
68 | + } | ||
69 | + | ||
70 | + | ||
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | ||
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | ||
73 | + vec3<T> sph; | ||
74 | + sph.ptr[0] = len(); | ||
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | ||
76 | + if(sph.ptr[0] == 0) | ||
77 | + sph.ptr[2] = 0; | ||
78 | + else | ||
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | ||
80 | + return sph; | ||
81 | + } | ||
82 | + | ||
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | ||
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | ||
85 | + vec3<T> cart; | ||
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | ||
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | ||
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | ||
89 | + | ||
90 | + return cart; | ||
91 | + } | ||
92 | + | ||
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | ||
94 | + CUDA_CALLABLE vec3<T> norm() const{ | ||
95 | + vec3<T> result; | ||
96 | + T l = len(); //compute the vector length | ||
97 | + return (*this) / l; | ||
98 | + } | ||
99 | + | ||
100 | + /// Computes the cross product of a 3-dimensional vector | ||
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | ||
102 | + | ||
103 | + vec3<T> result; | ||
104 | + | ||
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | ||
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | ||
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | ||
108 | + | ||
109 | + return result; | ||
110 | + } | ||
111 | + | ||
112 | + /// Compute the Euclidean inner (dot) product | ||
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | ||
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | ||
115 | + } | ||
116 | + | ||
117 | + /// Arithmetic addition operator | ||
118 | + | ||
119 | + /// @param rhs is the right-hand-side operator for the addition | ||
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | ||
121 | + vec3<T> result; | ||
122 | + result.ptr[0] = ptr[0] + rhs[0]; | ||
123 | + result.ptr[1] = ptr[1] + rhs[1]; | ||
124 | + result.ptr[2] = ptr[2] + rhs[2]; | ||
125 | + return result; | ||
126 | + } | ||
127 | + | ||
128 | + /// Arithmetic addition to a scalar | ||
129 | + | ||
130 | + /// @param rhs is the right-hand-side operator for the addition | ||
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | ||
132 | + vec3<T> result; | ||
133 | + result.ptr[0] = ptr[0] + rhs; | ||
134 | + result.ptr[1] = ptr[1] + rhs; | ||
135 | + result.ptr[2] = ptr[2] + rhs; | ||
136 | + return result; | ||
137 | + } | ||
138 | + | ||
139 | + /// Arithmetic subtraction operator | ||
140 | + | ||
141 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | ||
143 | + vec3<T> result; | ||
144 | + result.ptr[0] = ptr[0] - rhs[0]; | ||
145 | + result.ptr[1] = ptr[1] - rhs[1]; | ||
146 | + result.ptr[2] = ptr[2] - rhs[2]; | ||
147 | + return result; | ||
148 | + } | ||
149 | + /// Arithmetic subtraction to a scalar | ||
150 | + | ||
151 | + /// @param rhs is the right-hand-side operator for the addition | ||
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | ||
153 | + vec3<T> result; | ||
154 | + result.ptr[0] = ptr[0] - rhs; | ||
155 | + result.ptr[1] = ptr[1] - rhs; | ||
156 | + result.ptr[2] = ptr[2] - rhs; | ||
157 | + return result; | ||
158 | + } | ||
159 | + | ||
160 | + /// Arithmetic scalar multiplication operator | ||
161 | + | ||
162 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | ||
164 | + vec3<T> result; | ||
165 | + result.ptr[0] = ptr[0] * rhs; | ||
166 | + result.ptr[1] = ptr[1] * rhs; | ||
167 | + result.ptr[2] = ptr[2] * rhs; | ||
168 | + return result; | ||
169 | + } | ||
170 | + | ||
171 | + /// Arithmetic scalar division operator | ||
172 | + | ||
173 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | ||
175 | + return (*this) * ((T)1.0/rhs); | ||
176 | + } | ||
177 | + | ||
178 | + /// Multiplication by a scalar, followed by assignment | ||
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | ||
180 | + ptr[0] = ptr[0] * rhs; | ||
181 | + ptr[1] = ptr[1] * rhs; | ||
182 | + ptr[2] = ptr[2] * rhs; | ||
183 | + return *this; | ||
184 | + } | ||
185 | + | ||
186 | + /// Addition and assignment | ||
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | ||
188 | + ptr[0] = ptr[0] + rhs; | ||
189 | + ptr[1] = ptr[1] + rhs; | ||
190 | + ptr[2] = ptr[2] + rhs; | ||
191 | + return *this; | ||
192 | + } | ||
193 | + | ||
194 | + /// Assign a scalar to all values | ||
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | ||
196 | + ptr[0] = ptr[0] = rhs; | ||
197 | + ptr[1] = ptr[1] = rhs; | ||
198 | + ptr[2] = ptr[2] = rhs; | ||
199 | + return *this; | ||
200 | + } | ||
201 | + | ||
202 | + /// Casting and assignment | ||
203 | + template<typename Y> | ||
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | ||
205 | + ptr[0] = (T)rhs.ptr[0]; | ||
206 | + ptr[1] = (T)rhs.ptr[1]; | ||
207 | + ptr[2] = (T)rhs.ptr[2]; | ||
208 | + return *this; | ||
209 | + } | ||
210 | + | ||
211 | + /// Unary minus (returns the negative of the vector) | ||
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | ||
213 | + vec3<T> result; | ||
214 | + result.ptr[0] = -ptr[0]; | ||
215 | + result.ptr[1] = -ptr[1]; | ||
216 | + result.ptr[2] = -ptr[2]; | ||
217 | + return result; | ||
218 | + } | ||
219 | + | ||
220 | +<<<<<<< HEAD | ||
221 | +//#ifndef __NVCC__ | ||
222 | +======= | ||
223 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | ||
224 | + /// Outputs the vector as a string | ||
225 | + std::string str() const{ | ||
226 | + std::stringstream ss; | ||
227 | + | ||
228 | + const size_t N = 3; | ||
229 | + | ||
230 | + ss<<"["; | ||
231 | + for(size_t i=0; i<N; i++) | ||
232 | + { | ||
233 | + ss<<ptr[i]; | ||
234 | + if(i != N-1) | ||
235 | + ss<<", "; | ||
236 | + } | ||
237 | + ss<<"]"; | ||
238 | + | ||
239 | + return ss.str(); | ||
240 | + } | ||
241 | +<<<<<<< HEAD | ||
242 | +//#endif | ||
243 | +======= | ||
244 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | ||
245 | + | ||
246 | + size_t size(){ return 3; } | ||
247 | + | ||
248 | + }; //end class vec3 | ||
249 | +} //end namespace stim | ||
250 | + | ||
251 | +/// Multiply a vector by a constant when the vector is on the right hand side | ||
252 | +template <typename T> | ||
253 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | ||
254 | + return rhs * lhs; | ||
255 | +} | ||
256 | + | ||
257 | +//stream operator | ||
258 | +template<typename T> | ||
259 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | ||
260 | + os<<rhs.str(); | ||
261 | + return os; | ||
262 | +} | ||
263 | + | ||
264 | +#endif |
1 | +#ifndef STIM_VEC3_H | ||
2 | +#define STIM_VEC3_H | ||
3 | + | ||
4 | + | ||
5 | +#include <stim/cuda/cudatools/callable.h> | ||
6 | +#include <cmath> | ||
7 | + | ||
8 | + | ||
9 | +namespace stim{ | ||
10 | + | ||
11 | + | ||
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | ||
13 | +template<typename T> | ||
14 | +class vec3{ | ||
15 | + | ||
16 | +protected: | ||
17 | + T ptr[3]; | ||
18 | + | ||
19 | +public: | ||
20 | + | ||
21 | + CUDA_CALLABLE vec3(){} | ||
22 | + | ||
23 | + CUDA_CALLABLE vec3(T v){ | ||
24 | + ptr[0] = ptr[1] = ptr[2] = v; | ||
25 | + } | ||
26 | + | ||
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | ||
28 | + ptr[0] = x; | ||
29 | + ptr[1] = y; | ||
30 | + ptr[2] = z; | ||
31 | + } | ||
32 | + | ||
33 | + //copy constructor | ||
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | ||
35 | + ptr[0] = other.ptr[0]; | ||
36 | + ptr[1] = other.ptr[1]; | ||
37 | + ptr[2] = other.ptr[2]; | ||
38 | + } | ||
39 | + | ||
40 | + //access an element using an index | ||
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | ||
42 | + return ptr[idx]; | ||
43 | + } | ||
44 | + | ||
45 | + CUDA_CALLABLE T* data(){ | ||
46 | + return ptr; | ||
47 | + } | ||
48 | + | ||
49 | +/// Casting operator. Creates a new vector with a new type U. | ||
50 | + template< typename U > | ||
51 | + CUDA_CALLABLE operator vec3<U>(){ | ||
52 | + vec3<U> result; | ||
53 | + result.ptr[0] = (U)ptr[0]; | ||
54 | + result.ptr[1] = (U)ptr[1]; | ||
55 | + result.ptr[2] = (U)ptr[2]; | ||
56 | + | ||
57 | + return result; | ||
58 | + } | ||
59 | + | ||
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | ||
61 | + CUDA_CALLABLE T len_sq() const{ | ||
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | ||
63 | + } | ||
64 | + | ||
65 | + /// computes the Euclidean length of the vector | ||
66 | + CUDA_CALLABLE T len() const{ | ||
67 | + return sqrt(len_sq()); | ||
68 | + } | ||
69 | + | ||
70 | + | ||
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | ||
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | ||
73 | + vec3<T> sph; | ||
74 | + sph.ptr[0] = len(); | ||
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | ||
76 | + if(sph.ptr[0] == 0) | ||
77 | + sph.ptr[2] = 0; | ||
78 | + else | ||
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | ||
80 | + return sph; | ||
81 | + } | ||
82 | + | ||
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | ||
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | ||
85 | + vec3<T> cart; | ||
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | ||
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | ||
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | ||
89 | + | ||
90 | + return cart; | ||
91 | + } | ||
92 | + | ||
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | ||
94 | + CUDA_CALLABLE vec3<T> norm() const{ | ||
95 | + vec3<T> result; | ||
96 | + T l = len(); //compute the vector length | ||
97 | + return (*this) / l; | ||
98 | + } | ||
99 | + | ||
100 | + /// Computes the cross product of a 3-dimensional vector | ||
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | ||
102 | + | ||
103 | + vec3<T> result; | ||
104 | + | ||
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | ||
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | ||
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | ||
108 | + | ||
109 | + return result; | ||
110 | + } | ||
111 | + | ||
112 | + /// Compute the Euclidean inner (dot) product | ||
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | ||
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | ||
115 | + } | ||
116 | + | ||
117 | + /// Arithmetic addition operator | ||
118 | + | ||
119 | + /// @param rhs is the right-hand-side operator for the addition | ||
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | ||
121 | + vec3<T> result; | ||
122 | + result.ptr[0] = ptr[0] + rhs[0]; | ||
123 | + result.ptr[1] = ptr[1] + rhs[1]; | ||
124 | + result.ptr[2] = ptr[2] + rhs[2]; | ||
125 | + return result; | ||
126 | + } | ||
127 | + | ||
128 | + /// Arithmetic addition to a scalar | ||
129 | + | ||
130 | + /// @param rhs is the right-hand-side operator for the addition | ||
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | ||
132 | + vec3<T> result; | ||
133 | + result.ptr[0] = ptr[0] + rhs; | ||
134 | + result.ptr[1] = ptr[1] + rhs; | ||
135 | + result.ptr[2] = ptr[2] + rhs; | ||
136 | + return result; | ||
137 | + } | ||
138 | + | ||
139 | + /// Arithmetic subtraction operator | ||
140 | + | ||
141 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | ||
143 | + vec3<T> result; | ||
144 | + result.ptr[0] = ptr[0] - rhs[0]; | ||
145 | + result.ptr[1] = ptr[1] - rhs[1]; | ||
146 | + result.ptr[2] = ptr[2] - rhs[2]; | ||
147 | + return result; | ||
148 | + } | ||
149 | + /// Arithmetic subtraction to a scalar | ||
150 | + | ||
151 | + /// @param rhs is the right-hand-side operator for the addition | ||
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | ||
153 | + vec3<T> result; | ||
154 | + result.ptr[0] = ptr[0] - rhs; | ||
155 | + result.ptr[1] = ptr[1] - rhs; | ||
156 | + result.ptr[2] = ptr[2] - rhs; | ||
157 | + return result; | ||
158 | + } | ||
159 | + | ||
160 | + /// Arithmetic scalar multiplication operator | ||
161 | + | ||
162 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | ||
164 | + vec3<T> result; | ||
165 | + result.ptr[0] = ptr[0] * rhs; | ||
166 | + result.ptr[1] = ptr[1] * rhs; | ||
167 | + result.ptr[2] = ptr[2] * rhs; | ||
168 | + return result; | ||
169 | + } | ||
170 | + | ||
171 | + /// Arithmetic scalar division operator | ||
172 | + | ||
173 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | ||
175 | + return (*this) * ((T)1.0/rhs); | ||
176 | + } | ||
177 | + | ||
178 | + /// Multiplication by a scalar, followed by assignment | ||
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | ||
180 | + ptr[0] = ptr[0] * rhs; | ||
181 | + ptr[1] = ptr[1] * rhs; | ||
182 | + ptr[2] = ptr[2] * rhs; | ||
183 | + return *this; | ||
184 | + } | ||
185 | + | ||
186 | + /// Addition and assignment | ||
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | ||
188 | + ptr[0] = ptr[0] + rhs; | ||
189 | + ptr[1] = ptr[1] + rhs; | ||
190 | + ptr[2] = ptr[2] + rhs; | ||
191 | + return *this; | ||
192 | + } | ||
193 | + | ||
194 | + /// Assign a scalar to all values | ||
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | ||
196 | + ptr[0] = ptr[0] = rhs; | ||
197 | + ptr[1] = ptr[1] = rhs; | ||
198 | + ptr[2] = ptr[2] = rhs; | ||
199 | + return *this; | ||
200 | + } | ||
201 | + | ||
202 | + /// Casting and assignment | ||
203 | + template<typename Y> | ||
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | ||
205 | + ptr[0] = (T)rhs.ptr[0]; | ||
206 | + ptr[1] = (T)rhs.ptr[1]; | ||
207 | + ptr[2] = (T)rhs.ptr[2]; | ||
208 | + return *this; | ||
209 | + } | ||
210 | + | ||
211 | + /// Unary minus (returns the negative of the vector) | ||
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | ||
213 | + vec3<T> result; | ||
214 | + result.ptr[0] = -ptr[0]; | ||
215 | + result.ptr[1] = -ptr[1]; | ||
216 | + result.ptr[2] = -ptr[2]; | ||
217 | + return result; | ||
218 | + } | ||
219 | + | ||
220 | +<<<<<<< HEAD | ||
221 | +//#ifndef __NVCC__ | ||
222 | +======= | ||
223 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | ||
224 | + /// Outputs the vector as a string | ||
225 | + std::string str() const{ | ||
226 | + std::stringstream ss; | ||
227 | + | ||
228 | + const size_t N = 3; | ||
229 | + | ||
230 | + ss<<"["; | ||
231 | + for(size_t i=0; i<N; i++) | ||
232 | + { | ||
233 | + ss<<ptr[i]; | ||
234 | + if(i != N-1) | ||
235 | + ss<<", "; | ||
236 | + } | ||
237 | + ss<<"]"; | ||
238 | + | ||
239 | + return ss.str(); | ||
240 | + } | ||
241 | +<<<<<<< HEAD | ||
242 | +//#endif | ||
243 | +======= | ||
244 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | ||
245 | + | ||
246 | + size_t size(){ return 3; } | ||
247 | + | ||
248 | + }; //end class vec3 | ||
249 | +} //end namespace stim | ||
250 | + | ||
251 | +/// Multiply a vector by a constant when the vector is on the right hand side | ||
252 | +template <typename T> | ||
253 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | ||
254 | + return rhs * lhs; | ||
255 | +} | ||
256 | + | ||
257 | +//stream operator | ||
258 | +template<typename T> | ||
259 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | ||
260 | + os<<rhs.str(); | ||
261 | + return os; | ||
262 | +} | ||
263 | + | ||
264 | +#endif |
1 | +#ifndef STIM_VEC3_H | ||
2 | +#define STIM_VEC3_H | ||
3 | + | ||
4 | + | ||
5 | +#include <stim/cuda/cudatools/callable.h> | ||
6 | + | ||
7 | + | ||
8 | +namespace stim{ | ||
9 | + | ||
10 | + | ||
11 | +/// A class designed to act as a 3D vector with CUDA compatibility | ||
12 | +template<typename T> | ||
13 | +class vec3{ | ||
14 | + | ||
15 | +protected: | ||
16 | + T ptr[3]; | ||
17 | + | ||
18 | +public: | ||
19 | + | ||
20 | + CUDA_CALLABLE vec3(){} | ||
21 | + | ||
22 | + CUDA_CALLABLE vec3(T v){ | ||
23 | + ptr[0] = ptr[1] = ptr[2] = v; | ||
24 | + } | ||
25 | + | ||
26 | + CUDA_CALLABLE vec3(T x, T y, T z){ | ||
27 | + ptr[0] = x; | ||
28 | + ptr[1] = y; | ||
29 | + ptr[2] = z; | ||
30 | + } | ||
31 | + | ||
32 | + //copy constructor | ||
33 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | ||
34 | + ptr[0] = other.ptr[0]; | ||
35 | + ptr[1] = other.ptr[1]; | ||
36 | + ptr[2] = other.ptr[2]; | ||
37 | + } | ||
38 | + | ||
39 | + //access an element using an index | ||
40 | + CUDA_CALLABLE T& operator[](size_t idx){ | ||
41 | + return ptr[idx]; | ||
42 | + } | ||
43 | + | ||
44 | + CUDA_CALLABLE T* data(){ | ||
45 | + return ptr; | ||
46 | + } | ||
47 | + | ||
48 | +/// Casting operator. Creates a new vector with a new type U. | ||
49 | + template< typename U > | ||
50 | + CUDA_CALLABLE operator vec3<U>(){ | ||
51 | + vec3<U> result; | ||
52 | + result.ptr[0] = (U)ptr[0]; | ||
53 | + result.ptr[1] = (U)ptr[1]; | ||
54 | + result.ptr[2] = (U)ptr[2]; | ||
55 | + | ||
56 | + return result; | ||
57 | + } | ||
58 | + | ||
59 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | ||
60 | + CUDA_CALLABLE T len_sq() const{ | ||
61 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | ||
62 | + } | ||
63 | + | ||
64 | + /// computes the Euclidean length of the vector | ||
65 | + CUDA_CALLABLE T len() const{ | ||
66 | + return sqrt(len_sq()); | ||
67 | + } | ||
68 | + | ||
69 | + | ||
70 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | ||
71 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | ||
72 | + vec3<T> sph; | ||
73 | + sph.ptr[0] = len(); | ||
74 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | ||
75 | + if(sph.ptr[0] == 0) | ||
76 | + sph.ptr[2] = 0; | ||
77 | + else | ||
78 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | ||
79 | + return sph; | ||
80 | + } | ||
81 | + | ||
82 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | ||
83 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | ||
84 | + vec3<T> cart; | ||
85 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | ||
86 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | ||
87 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | ||
88 | + | ||
89 | + return cart; | ||
90 | + } | ||
91 | + | ||
92 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | ||
93 | + CUDA_CALLABLE vec3<T> norm() const{ | ||
94 | + vec3<T> result; | ||
95 | + T l = len(); //compute the vector length | ||
96 | + return (*this) / l; | ||
97 | + } | ||
98 | + | ||
99 | + /// Computes the cross product of a 3-dimensional vector | ||
100 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | ||
101 | + | ||
102 | + vec3<T> result; | ||
103 | + | ||
104 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | ||
105 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | ||
106 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | ||
107 | + | ||
108 | + return result; | ||
109 | + } | ||
110 | + | ||
111 | + /// Compute the Euclidean inner (dot) product | ||
112 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | ||
113 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | ||
114 | + } | ||
115 | + | ||
116 | + /// Arithmetic addition operator | ||
117 | + | ||
118 | + /// @param rhs is the right-hand-side operator for the addition | ||
119 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | ||
120 | + vec3<T> result; | ||
121 | + result.ptr[0] = ptr[0] + rhs[0]; | ||
122 | + result.ptr[1] = ptr[1] + rhs[1]; | ||
123 | + result.ptr[2] = ptr[2] + rhs[2]; | ||
124 | + return result; | ||
125 | + } | ||
126 | + | ||
127 | + /// Arithmetic addition to a scalar | ||
128 | + | ||
129 | + /// @param rhs is the right-hand-side operator for the addition | ||
130 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | ||
131 | + vec3<T> result; | ||
132 | + result.ptr[0] = ptr[0] + rhs; | ||
133 | + result.ptr[1] = ptr[1] + rhs; | ||
134 | + result.ptr[2] = ptr[2] + rhs; | ||
135 | + return result; | ||
136 | + } | ||
137 | + | ||
138 | + /// Arithmetic subtraction operator | ||
139 | + | ||
140 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
141 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | ||
142 | + vec3<T> result; | ||
143 | + result.ptr[0] = ptr[0] - rhs[0]; | ||
144 | + result.ptr[1] = ptr[1] - rhs[1]; | ||
145 | + result.ptr[2] = ptr[2] - rhs[2]; | ||
146 | + return result; | ||
147 | + } | ||
148 | + /// Arithmetic subtraction to a scalar | ||
149 | + | ||
150 | + /// @param rhs is the right-hand-side operator for the addition | ||
151 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | ||
152 | + vec3<T> result; | ||
153 | + result.ptr[0] = ptr[0] - rhs; | ||
154 | + result.ptr[1] = ptr[1] - rhs; | ||
155 | + result.ptr[2] = ptr[2] - rhs; | ||
156 | + return result; | ||
157 | + } | ||
158 | + | ||
159 | + /// Arithmetic scalar multiplication operator | ||
160 | + | ||
161 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
162 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | ||
163 | + vec3<T> result; | ||
164 | + result.ptr[0] = ptr[0] * rhs; | ||
165 | + result.ptr[1] = ptr[1] * rhs; | ||
166 | + result.ptr[2] = ptr[2] * rhs; | ||
167 | + return result; | ||
168 | + } | ||
169 | + | ||
170 | + /// Arithmetic scalar division operator | ||
171 | + | ||
172 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
173 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | ||
174 | + return (*this) * ((T)1.0/rhs); | ||
175 | + } | ||
176 | + | ||
177 | + /// Multiplication by a scalar, followed by assignment | ||
178 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | ||
179 | + ptr[0] = ptr[0] * rhs; | ||
180 | + ptr[1] = ptr[1] * rhs; | ||
181 | + ptr[2] = ptr[2] * rhs; | ||
182 | + return *this; | ||
183 | + } | ||
184 | + | ||
185 | + /// Addition and assignment | ||
186 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | ||
187 | + ptr[0] = ptr[0] + rhs; | ||
188 | + ptr[1] = ptr[1] + rhs; | ||
189 | + ptr[2] = ptr[2] + rhs; | ||
190 | + return *this; | ||
191 | + } | ||
192 | + | ||
193 | + /// Assign a scalar to all values | ||
194 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | ||
195 | + ptr[0] = ptr[0] = rhs; | ||
196 | + ptr[1] = ptr[1] = rhs; | ||
197 | + ptr[2] = ptr[2] = rhs; | ||
198 | + return *this; | ||
199 | + } | ||
200 | + | ||
201 | + /// Casting and assignment | ||
202 | + template<typename Y> | ||
203 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | ||
204 | + ptr[0] = (T)rhs.ptr[0]; | ||
205 | + ptr[1] = (T)rhs.ptr[1]; | ||
206 | + ptr[2] = (T)rhs.ptr[2]; | ||
207 | + return *this; | ||
208 | + } | ||
209 | + | ||
210 | + /// Unary minus (returns the negative of the vector) | ||
211 | + CUDA_CALLABLE vec3<T> operator-() const{ | ||
212 | + vec3<T> result; | ||
213 | + result.ptr[0] = -ptr[0]; | ||
214 | + result.ptr[1] = -ptr[1]; | ||
215 | + result.ptr[2] = -ptr[2]; | ||
216 | + return result; | ||
217 | + } | ||
218 | + | ||
219 | +#ifndef __NVCC__ | ||
220 | + /// Outputs the vector as a string | ||
221 | + std::string str() const{ | ||
222 | + std::stringstream ss; | ||
223 | + | ||
224 | + const size_t N = 3; | ||
225 | + | ||
226 | + ss<<"["; | ||
227 | + for(size_t i=0; i<N; i++) | ||
228 | + { | ||
229 | + ss<<ptr[i]; | ||
230 | + if(i != N-1) | ||
231 | + ss<<", "; | ||
232 | + } | ||
233 | + ss<<"]"; | ||
234 | + | ||
235 | + return ss.str(); | ||
236 | + } | ||
237 | +#endif | ||
238 | + | ||
239 | + size_t size(){ return 3; } | ||
240 | + | ||
241 | + }; //end class vec3 | ||
242 | +} //end namespace stim | ||
243 | + | ||
244 | +/// Multiply a vector by a constant when the vector is on the right hand side | ||
245 | +template <typename T> | ||
246 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | ||
247 | + return rhs * lhs; | ||
248 | +} | ||
249 | + | ||
250 | +//stream operator | ||
251 | +template<typename T> | ||
252 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | ||
253 | + os<<rhs.str(); | ||
254 | + return os; | ||
255 | +} | ||
256 | + | ||
257 | +#endif |
1 | +#ifndef STIM_VEC3_H | ||
2 | +#define STIM_VEC3_H | ||
3 | + | ||
4 | + | ||
5 | +#include <stim/cuda/cudatools/callable.h> | ||
6 | + | ||
7 | + | ||
8 | +namespace stim{ | ||
9 | + | ||
10 | + | ||
11 | +/// A class designed to act as a 3D vector with CUDA compatibility | ||
12 | +template<typename T> | ||
13 | +class vec3{ | ||
14 | + | ||
15 | +protected: | ||
16 | + T ptr[3]; | ||
17 | + | ||
18 | +public: | ||
19 | + | ||
20 | + CUDA_CALLABLE vec3(){} | ||
21 | + | ||
22 | + CUDA_CALLABLE vec3(T v){ | ||
23 | + ptr[0] = ptr[1] = ptr[2] = v; | ||
24 | + } | ||
25 | + | ||
26 | + CUDA_CALLABLE vec3(T x, T y, T z){ | ||
27 | + ptr[0] = x; | ||
28 | + ptr[1] = y; | ||
29 | + ptr[2] = z; | ||
30 | + } | ||
31 | + | ||
32 | + //copy constructor | ||
33 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | ||
34 | + ptr[0] = other.ptr[0]; | ||
35 | + ptr[1] = other.ptr[1]; | ||
36 | + ptr[2] = other.ptr[2]; | ||
37 | + } | ||
38 | + | ||
39 | + //access an element using an index | ||
40 | + CUDA_CALLABLE T& operator[](size_t idx){ | ||
41 | + return ptr[idx]; | ||
42 | + } | ||
43 | + | ||
44 | + CUDA_CALLABLE T* data(){ | ||
45 | + return ptr; | ||
46 | + } | ||
47 | + | ||
48 | +/// Casting operator. Creates a new vector with a new type U. | ||
49 | + template< typename U > | ||
50 | + CUDA_CALLABLE operator vec3<U>(){ | ||
51 | + vec3<U> result; | ||
52 | + result.ptr[0] = (U)ptr[0]; | ||
53 | + result.ptr[1] = (U)ptr[1]; | ||
54 | + result.ptr[2] = (U)ptr[2]; | ||
55 | + | ||
56 | + return result; | ||
57 | + } | ||
58 | + | ||
59 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | ||
60 | + CUDA_CALLABLE T len_sq() const{ | ||
61 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | ||
62 | + } | ||
63 | + | ||
64 | + /// computes the Euclidean length of the vector | ||
65 | + CUDA_CALLABLE T len() const{ | ||
66 | + return sqrt(len_sq()); | ||
67 | + } | ||
68 | + | ||
69 | + | ||
70 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | ||
71 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | ||
72 | + vec3<T> sph; | ||
73 | + sph.ptr[0] = len(); | ||
74 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | ||
75 | + if(sph.ptr[0] == 0) | ||
76 | + sph.ptr[2] = 0; | ||
77 | + else | ||
78 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | ||
79 | + return sph; | ||
80 | + } | ||
81 | + | ||
82 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | ||
83 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | ||
84 | + vec3<T> cart; | ||
85 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | ||
86 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | ||
87 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | ||
88 | + | ||
89 | + return cart; | ||
90 | + } | ||
91 | + | ||
92 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | ||
93 | + CUDA_CALLABLE vec3<T> norm() const{ | ||
94 | + vec3<T> result; | ||
95 | + T l = len(); //compute the vector length | ||
96 | + return (*this) / l; | ||
97 | + } | ||
98 | + | ||
99 | + /// Computes the cross product of a 3-dimensional vector | ||
100 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | ||
101 | + | ||
102 | + vec3<T> result; | ||
103 | + | ||
104 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | ||
105 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | ||
106 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | ||
107 | + | ||
108 | + return result; | ||
109 | + } | ||
110 | + | ||
111 | + /// Compute the Euclidean inner (dot) product | ||
112 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | ||
113 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | ||
114 | + } | ||
115 | + | ||
116 | + /// Arithmetic addition operator | ||
117 | + | ||
118 | + /// @param rhs is the right-hand-side operator for the addition | ||
119 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | ||
120 | + vec3<T> result; | ||
121 | + result.ptr[0] = ptr[0] + rhs[0]; | ||
122 | + result.ptr[1] = ptr[1] + rhs[1]; | ||
123 | + result.ptr[2] = ptr[2] + rhs[2]; | ||
124 | + return result; | ||
125 | + } | ||
126 | + | ||
127 | + /// Arithmetic addition to a scalar | ||
128 | + | ||
129 | + /// @param rhs is the right-hand-side operator for the addition | ||
130 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | ||
131 | + vec3<T> result; | ||
132 | + result.ptr[0] = ptr[0] + rhs; | ||
133 | + result.ptr[1] = ptr[1] + rhs; | ||
134 | + result.ptr[2] = ptr[2] + rhs; | ||
135 | + return result; | ||
136 | + } | ||
137 | + | ||
138 | + /// Arithmetic subtraction operator | ||
139 | + | ||
140 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
141 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | ||
142 | + vec3<T> result; | ||
143 | + result.ptr[0] = ptr[0] - rhs[0]; | ||
144 | + result.ptr[1] = ptr[1] - rhs[1]; | ||
145 | + result.ptr[2] = ptr[2] - rhs[2]; | ||
146 | + return result; | ||
147 | + } | ||
148 | + /// Arithmetic subtraction to a scalar | ||
149 | + | ||
150 | + /// @param rhs is the right-hand-side operator for the addition | ||
151 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | ||
152 | + vec3<T> result; | ||
153 | + result.ptr[0] = ptr[0] - rhs; | ||
154 | + result.ptr[1] = ptr[1] - rhs; | ||
155 | + result.ptr[2] = ptr[2] - rhs; | ||
156 | + return result; | ||
157 | + } | ||
158 | + | ||
159 | + /// Arithmetic scalar multiplication operator | ||
160 | + | ||
161 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
162 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | ||
163 | + vec3<T> result; | ||
164 | + result.ptr[0] = ptr[0] * rhs; | ||
165 | + result.ptr[1] = ptr[1] * rhs; | ||
166 | + result.ptr[2] = ptr[2] * rhs; | ||
167 | + return result; | ||
168 | + } | ||
169 | + | ||
170 | + /// Arithmetic scalar division operator | ||
171 | + | ||
172 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
173 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | ||
174 | + return (*this) * ((T)1.0/rhs); | ||
175 | + } | ||
176 | + | ||
177 | + /// Multiplication by a scalar, followed by assignment | ||
178 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | ||
179 | + ptr[0] = ptr[0] * rhs; | ||
180 | + ptr[1] = ptr[1] * rhs; | ||
181 | + ptr[2] = ptr[2] * rhs; | ||
182 | + return *this; | ||
183 | + } | ||
184 | + | ||
185 | + /// Addition and assignment | ||
186 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | ||
187 | + ptr[0] = ptr[0] + rhs; | ||
188 | + ptr[1] = ptr[1] + rhs; | ||
189 | + ptr[2] = ptr[2] + rhs; | ||
190 | + return *this; | ||
191 | + } | ||
192 | + | ||
193 | + /// Assign a scalar to all values | ||
194 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | ||
195 | + ptr[0] = ptr[0] = rhs; | ||
196 | + ptr[1] = ptr[1] = rhs; | ||
197 | + ptr[2] = ptr[2] = rhs; | ||
198 | + return *this; | ||
199 | + } | ||
200 | + | ||
201 | + /// Casting and assignment | ||
202 | + template<typename Y> | ||
203 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | ||
204 | + ptr[0] = (T)rhs.ptr[0]; | ||
205 | + ptr[1] = (T)rhs.ptr[1]; | ||
206 | + ptr[2] = (T)rhs.ptr[2]; | ||
207 | + return *this; | ||
208 | + } | ||
209 | + | ||
210 | + /// Unary minus (returns the negative of the vector) | ||
211 | + CUDA_CALLABLE vec3<T> operator-() const{ | ||
212 | + vec3<T> result; | ||
213 | + result.ptr[0] = -ptr[0]; | ||
214 | + result.ptr[1] = -ptr[1]; | ||
215 | + result.ptr[2] = -ptr[2]; | ||
216 | + return result; | ||
217 | + } | ||
218 | + | ||
219 | +//#ifndef __NVCC__ | ||
220 | + /// Outputs the vector as a string | ||
221 | + std::string str() const{ | ||
222 | + std::stringstream ss; | ||
223 | + | ||
224 | + const size_t N = 3; | ||
225 | + | ||
226 | + ss<<"["; | ||
227 | + for(size_t i=0; i<N; i++) | ||
228 | + { | ||
229 | + ss<<ptr[i]; | ||
230 | + if(i != N-1) | ||
231 | + ss<<", "; | ||
232 | + } | ||
233 | + ss<<"]"; | ||
234 | + | ||
235 | + return ss.str(); | ||
236 | + } | ||
237 | +//#endif | ||
238 | + | ||
239 | + size_t size(){ return 3; } | ||
240 | + | ||
241 | + }; //end class vec3 | ||
242 | +} //end namespace stim | ||
243 | + | ||
244 | +/// Multiply a vector by a constant when the vector is on the right hand side | ||
245 | +template <typename T> | ||
246 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | ||
247 | + return rhs * lhs; | ||
248 | +} | ||
249 | + | ||
250 | +//stream operator | ||
251 | +template<typename T> | ||
252 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | ||
253 | + os<<rhs.str(); | ||
254 | + return os; | ||
255 | +} | ||
256 | + | ||
257 | +#endif |
1 | +#ifndef STIM_VEC3_H | ||
2 | +#define STIM_VEC3_H | ||
3 | + | ||
4 | + | ||
5 | +#include <stim/cuda/cudatools/callable.h> | ||
6 | +#include <cmath> | ||
7 | + | ||
8 | + | ||
9 | +namespace stim{ | ||
10 | + | ||
11 | + | ||
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | ||
13 | +template<typename T> | ||
14 | +class vec3{ | ||
15 | + | ||
16 | +protected: | ||
17 | + T ptr[3]; | ||
18 | + | ||
19 | +public: | ||
20 | + | ||
21 | + CUDA_CALLABLE vec3(){} | ||
22 | + | ||
23 | + CUDA_CALLABLE vec3(T v){ | ||
24 | + ptr[0] = ptr[1] = ptr[2] = v; | ||
25 | + } | ||
26 | + | ||
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | ||
28 | + ptr[0] = x; | ||
29 | + ptr[1] = y; | ||
30 | + ptr[2] = z; | ||
31 | + } | ||
32 | + | ||
33 | + //copy constructor | ||
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | ||
35 | + ptr[0] = other.ptr[0]; | ||
36 | + ptr[1] = other.ptr[1]; | ||
37 | + ptr[2] = other.ptr[2]; | ||
38 | + } | ||
39 | + | ||
40 | + //access an element using an index | ||
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | ||
42 | + return ptr[idx]; | ||
43 | + } | ||
44 | + | ||
45 | + CUDA_CALLABLE T* data(){ | ||
46 | + return ptr; | ||
47 | + } | ||
48 | + | ||
49 | +/// Casting operator. Creates a new vector with a new type U. | ||
50 | + template< typename U > | ||
51 | + CUDA_CALLABLE operator vec3<U>(){ | ||
52 | + vec3<U> result; | ||
53 | + result.ptr[0] = (U)ptr[0]; | ||
54 | + result.ptr[1] = (U)ptr[1]; | ||
55 | + result.ptr[2] = (U)ptr[2]; | ||
56 | + | ||
57 | + return result; | ||
58 | + } | ||
59 | + | ||
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | ||
61 | + CUDA_CALLABLE T len_sq() const{ | ||
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | ||
63 | + } | ||
64 | + | ||
65 | + /// computes the Euclidean length of the vector | ||
66 | + CUDA_CALLABLE T len() const{ | ||
67 | + return sqrt(len_sq()); | ||
68 | + } | ||
69 | + | ||
70 | + | ||
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | ||
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | ||
73 | + vec3<T> sph; | ||
74 | + sph.ptr[0] = len(); | ||
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | ||
76 | + if(sph.ptr[0] == 0) | ||
77 | + sph.ptr[2] = 0; | ||
78 | + else | ||
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | ||
80 | + return sph; | ||
81 | + } | ||
82 | + | ||
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | ||
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | ||
85 | + vec3<T> cart; | ||
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | ||
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | ||
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | ||
89 | + | ||
90 | + return cart; | ||
91 | + } | ||
92 | + | ||
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | ||
94 | + CUDA_CALLABLE vec3<T> norm() const{ | ||
95 | + vec3<T> result; | ||
96 | + T l = len(); //compute the vector length | ||
97 | + return (*this) / l; | ||
98 | + } | ||
99 | + | ||
100 | + /// Computes the cross product of a 3-dimensional vector | ||
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | ||
102 | + | ||
103 | + vec3<T> result; | ||
104 | + | ||
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | ||
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | ||
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | ||
108 | + | ||
109 | + return result; | ||
110 | + } | ||
111 | + | ||
112 | + /// Compute the Euclidean inner (dot) product | ||
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | ||
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | ||
115 | + } | ||
116 | + | ||
117 | + /// Arithmetic addition operator | ||
118 | + | ||
119 | + /// @param rhs is the right-hand-side operator for the addition | ||
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | ||
121 | + vec3<T> result; | ||
122 | + result.ptr[0] = ptr[0] + rhs[0]; | ||
123 | + result.ptr[1] = ptr[1] + rhs[1]; | ||
124 | + result.ptr[2] = ptr[2] + rhs[2]; | ||
125 | + return result; | ||
126 | + } | ||
127 | + | ||
128 | + /// Arithmetic addition to a scalar | ||
129 | + | ||
130 | + /// @param rhs is the right-hand-side operator for the addition | ||
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | ||
132 | + vec3<T> result; | ||
133 | + result.ptr[0] = ptr[0] + rhs; | ||
134 | + result.ptr[1] = ptr[1] + rhs; | ||
135 | + result.ptr[2] = ptr[2] + rhs; | ||
136 | + return result; | ||
137 | + } | ||
138 | + | ||
139 | + /// Arithmetic subtraction operator | ||
140 | + | ||
141 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | ||
143 | + vec3<T> result; | ||
144 | + result.ptr[0] = ptr[0] - rhs[0]; | ||
145 | + result.ptr[1] = ptr[1] - rhs[1]; | ||
146 | + result.ptr[2] = ptr[2] - rhs[2]; | ||
147 | + return result; | ||
148 | + } | ||
149 | + /// Arithmetic subtraction to a scalar | ||
150 | + | ||
151 | + /// @param rhs is the right-hand-side operator for the addition | ||
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | ||
153 | + vec3<T> result; | ||
154 | + result.ptr[0] = ptr[0] - rhs; | ||
155 | + result.ptr[1] = ptr[1] - rhs; | ||
156 | + result.ptr[2] = ptr[2] - rhs; | ||
157 | + return result; | ||
158 | + } | ||
159 | + | ||
160 | + /// Arithmetic scalar multiplication operator | ||
161 | + | ||
162 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | ||
164 | + vec3<T> result; | ||
165 | + result.ptr[0] = ptr[0] * rhs; | ||
166 | + result.ptr[1] = ptr[1] * rhs; | ||
167 | + result.ptr[2] = ptr[2] * rhs; | ||
168 | + return result; | ||
169 | + } | ||
170 | + | ||
171 | + /// Arithmetic scalar division operator | ||
172 | + | ||
173 | + /// @param rhs is the right-hand-side operator for the subtraction | ||
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | ||
175 | + return (*this) * ((T)1.0/rhs); | ||
176 | + } | ||
177 | + | ||
178 | + /// Multiplication by a scalar, followed by assignment | ||
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | ||
180 | + ptr[0] = ptr[0] * rhs; | ||
181 | + ptr[1] = ptr[1] * rhs; | ||
182 | + ptr[2] = ptr[2] * rhs; | ||
183 | + return *this; | ||
184 | + } | ||
185 | + | ||
186 | + /// Addition and assignment | ||
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | ||
188 | + ptr[0] = ptr[0] + rhs; | ||
189 | + ptr[1] = ptr[1] + rhs; | ||
190 | + ptr[2] = ptr[2] + rhs; | ||
191 | + return *this; | ||
192 | + } | ||
193 | + | ||
194 | + /// Assign a scalar to all values | ||
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | ||
196 | + ptr[0] = ptr[0] = rhs; | ||
197 | + ptr[1] = ptr[1] = rhs; | ||
198 | + ptr[2] = ptr[2] = rhs; | ||
199 | + return *this; | ||
200 | + } | ||
201 | + | ||
202 | + /// Casting and assignment | ||
203 | + template<typename Y> | ||
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | ||
205 | + ptr[0] = (T)rhs.ptr[0]; | ||
206 | + ptr[1] = (T)rhs.ptr[1]; | ||
207 | + ptr[2] = (T)rhs.ptr[2]; | ||
208 | + return *this; | ||
209 | + } | ||
210 | + | ||
211 | + /// Unary minus (returns the negative of the vector) | ||
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | ||
213 | + vec3<T> result; | ||
214 | + result.ptr[0] = -ptr[0]; | ||
215 | + result.ptr[1] = -ptr[1]; | ||
216 | + result.ptr[2] = -ptr[2]; | ||
217 | + return result; | ||
218 | + } | ||
219 | + | ||
220 | + /// Outputs the vector as a string | ||
221 | + std::string str() const{ | ||
222 | + std::stringstream ss; | ||
223 | + | ||
224 | + const size_t N = 3; | ||
225 | + | ||
226 | + ss<<"["; | ||
227 | + for(size_t i=0; i<N; i++) | ||
228 | + { | ||
229 | + ss<<ptr[i]; | ||
230 | + if(i != N-1) | ||
231 | + ss<<", "; | ||
232 | + } | ||
233 | + ss<<"]"; | ||
234 | + | ||
235 | + return ss.str(); | ||
236 | + } | ||
237 | + | ||
238 | + size_t size(){ return 3; } | ||
239 | + | ||
240 | + }; //end class vec3 | ||
241 | +} //end namespace stim | ||
242 | + | ||
243 | +/// Multiply a vector by a constant when the vector is on the right hand side | ||
244 | +template <typename T> | ||
245 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | ||
246 | + return rhs * lhs; | ||
247 | +} | ||
248 | + | ||
249 | +//stream operator | ||
250 | +template<typename T> | ||
251 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | ||
252 | + os<<rhs.str(); | ||
253 | + return os; | ||
254 | +} | ||
255 | + | ||
256 | +#endif |
stim/math/vector.h
@@ -5,6 +5,7 @@ | @@ -5,6 +5,7 @@ | ||
5 | #include <cmath> | 5 | #include <cmath> |
6 | #include <sstream> | 6 | #include <sstream> |
7 | #include <vector> | 7 | #include <vector> |
8 | +#include <algorithm> | ||
8 | 9 | ||
9 | #include <stim/cuda/cudatools/callable.h> | 10 | #include <stim/cuda/cudatools/callable.h> |
10 | #include <stim/math/vec3.h> | 11 | #include <stim/math/vec3.h> |
@@ -74,11 +75,11 @@ struct vec : public std::vector<T> | @@ -74,11 +75,11 @@ struct vec : public std::vector<T> | ||
74 | at(i) = other[i]; | 75 | at(i) = other[i]; |
75 | } | 76 | } |
76 | } | 77 | } |
77 | - | 78 | + |
78 | // vec( vec3<T>& other){ | 79 | // vec( vec3<T>& other){ |
79 | // resize(3); //resize the current vector to match the copy | 80 | // resize(3); //resize the current vector to match the copy |
80 | // for(size_t i=0; i<3; i++){ //copy each element | 81 | // for(size_t i=0; i<3; i++){ //copy each element |
81 | -// at(i) = other[i]; | 82 | +// at(i) = other[i]; |
82 | // } | 83 | // } |
83 | // } | 84 | // } |
84 | 85 | ||
@@ -139,16 +140,16 @@ struct vec : public std::vector<T> | @@ -139,16 +140,16 @@ struct vec : public std::vector<T> | ||
139 | 140 | ||
140 | } | 141 | } |
141 | 142 | ||
142 | - | ||
143 | - vec<T> cyl2cart() const | ||
144 | - { | ||
145 | - vec<T> cyl; | ||
146 | - cyl.push_back(at(0)*std::sin(at(1))); | ||
147 | - cyl.push_back(at(0)*std::cos(at(1))); | ||
148 | - cyl.push_back(at(2)); | ||
149 | - return(cyl); | ||
150 | - | ||
151 | - } | 143 | + |
144 | + vec<T> cyl2cart() const | ||
145 | + { | ||
146 | + vec<T> cyl; | ||
147 | + cyl.push_back(at(0)*std::sin(at(1))); | ||
148 | + cyl.push_back(at(0)*std::cos(at(1))); | ||
149 | + cyl.push_back(at(2)); | ||
150 | + return(cyl); | ||
151 | + | ||
152 | + } | ||
152 | /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | 153 | /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) |
153 | vec<T> cart2sph() const | 154 | vec<T> cart2sph() const |
154 | { | 155 | { |
@@ -335,16 +336,16 @@ struct vec : public std::vector<T> | @@ -335,16 +336,16 @@ struct vec : public std::vector<T> | ||
335 | return *this; | 336 | return *this; |
336 | } | 337 | } |
337 | 338 | ||
338 | - /// Cast to a vec3 | ||
339 | - operator stim::vec3<T>(){ | ||
340 | - stim::vec3<T> r; | ||
341 | - size_t N = std::min<size_t>(size(), 3); | ||
342 | - for(size_t i = 0; i < N; i++) | ||
343 | - r[i] = at(i); | ||
344 | - return r; | ||
345 | - } | ||
346 | - | ||
347 | - | 339 | + /// Cast to a vec3 |
340 | + operator stim::vec3<T>(){ | ||
341 | + stim::vec3<T> r; | ||
342 | + size_t N = std::min(size(), (size_t)3); | ||
343 | + for(size_t i = 0; i < N; i++) | ||
344 | + r[i] = at(i); | ||
345 | + return r; | ||
346 | + } | ||
347 | + | ||
348 | + | ||
348 | /// Casting and assignment | 349 | /// Casting and assignment |
349 | template<typename Y> | 350 | template<typename Y> |
350 | vec<T> & operator=(vec<Y> rhs){ | 351 | vec<T> & operator=(vec<Y> rhs){ |
@@ -355,16 +356,16 @@ struct vec : public std::vector<T> | @@ -355,16 +356,16 @@ struct vec : public std::vector<T> | ||
355 | at(i) = rhs[i]; | 356 | at(i) = rhs[i]; |
356 | return *this; | 357 | return *this; |
357 | } | 358 | } |
358 | - | ||
359 | - /// Assign a vec = vec3 | ||
360 | - template<typename Y> | ||
361 | - vec<T> & operator=(vec3<Y> rhs) | ||
362 | - { | ||
363 | - resize(3); | ||
364 | - for(size_t i=0; i<3; i++) | ||
365 | - at(i) = rhs[i]; | ||
366 | - return *this; | ||
367 | - } | 359 | + |
360 | + /// Assign a vec = vec3 | ||
361 | + template<typename Y> | ||
362 | + vec<T> & operator=(vec3<Y> rhs) | ||
363 | + { | ||
364 | + resize(3); | ||
365 | + for(size_t i=0; i<3; i++) | ||
366 | + at(i) = rhs[i]; | ||
367 | + return *this; | ||
368 | + } | ||
368 | 369 | ||
369 | /// Unary minus (returns the negative of the vector) | 370 | /// Unary minus (returns the negative of the vector) |
370 | vec<T> operator-() const{ | 371 | vec<T> operator-() const{ |
stim/parser/arguments.h
@@ -13,6 +13,44 @@ | @@ -13,6 +13,44 @@ | ||
13 | #include <Windows.h> | 13 | #include <Windows.h> |
14 | #endif | 14 | #endif |
15 | 15 | ||
16 | +/**The arglist class implements command line arguments. | ||
17 | + Example: | ||
18 | + | ||
19 | + 1) Create an arglist instance: | ||
20 | + | ||
21 | + stim::arglist args; | ||
22 | + | ||
23 | + 2) Add arguments: | ||
24 | + | ||
25 | + args.add("help", "prints this help"); | ||
26 | + args.add("foo", "foo takes a single integer value", "", "[intval]"); | ||
27 | + args.add("bar", "bar takes two floating point values", "", "[value1], [value2]"); | ||
28 | + | ||
29 | + 3) Parse the command line: | ||
30 | + | ||
31 | + args.parse(argc, argv); | ||
32 | + | ||
33 | + 4) You generally want to immediately test for help and output available arguments: | ||
34 | + | ||
35 | + if(args["help"].is_set()) | ||
36 | + std::cout<<args.str(); | ||
37 | + | ||
38 | + | ||
39 | + | ||
40 | + 5) Retrieve values: | ||
41 | + | ||
42 | + int foo; | ||
43 | + float bar1, bar2; | ||
44 | + if(args["foo"]) | ||
45 | + foo = args["foo"].as_int(); | ||
46 | + if(args["bar"]){ | ||
47 | + bar1 = args["bar"].as_float(0); | ||
48 | + bar2 = args["bar"].as_float(1); | ||
49 | + } | ||
50 | + | ||
51 | + | ||
52 | +**/ | ||
53 | + | ||
16 | namespace stim{ | 54 | namespace stim{ |
17 | 55 | ||
18 | class cmd_option | 56 | class cmd_option |
@@ -258,10 +296,12 @@ namespace stim{ | @@ -258,10 +296,12 @@ namespace stim{ | ||
258 | flag = true; | 296 | flag = true; |
259 | } | 297 | } |
260 | 298 | ||
261 | - bool is_set() | ||
262 | - { | 299 | + bool is_set() const{ |
263 | return flag; | 300 | return flag; |
264 | } | 301 | } |
302 | + operator bool() const{ | ||
303 | + return is_set(); | ||
304 | + } | ||
265 | 305 | ||
266 | }; | 306 | }; |
267 | 307 | ||
@@ -271,43 +311,7 @@ namespace stim{ | @@ -271,43 +311,7 @@ namespace stim{ | ||
271 | size_t index; | 311 | size_t index; |
272 | }; | 312 | }; |
273 | 313 | ||
274 | - /**The arglist class implements command line arguments. | ||
275 | - Example: | ||
276 | - | ||
277 | - 1) Create an arglist instance: | ||
278 | - | ||
279 | - stim::arglist args; | ||
280 | - | ||
281 | - 2) Add arguments: | ||
282 | 314 | ||
283 | - args.add("help", "prints this help"); | ||
284 | - args.add("foo", "foo takes a single integer value", "", "[intval]"); | ||
285 | - args.add("bar", "bar takes two floating point values", "", "[value1], [value2]"); | ||
286 | - | ||
287 | - 3) Parse the command line: | ||
288 | - | ||
289 | - args.parse(argc, argv); | ||
290 | - | ||
291 | - 4) You generally want to immediately test for help and output available arguments: | ||
292 | - | ||
293 | - if(args["help"].is_set()) | ||
294 | - std::cout<<args.str(); | ||
295 | - | ||
296 | - | ||
297 | - | ||
298 | - 5) Retrieve values: | ||
299 | - | ||
300 | - int foo; | ||
301 | - float bar1, bar2; | ||
302 | - if(args["foo"]) | ||
303 | - foo = args["foo"].as_int(); | ||
304 | - if(args["bar"]){ | ||
305 | - bar1 = args["bar"].as_float(0); | ||
306 | - bar2 = args["bar"].as_float(1); | ||
307 | - } | ||
308 | - | ||
309 | - | ||
310 | - **/ | ||
311 | 315 | ||
312 | class arglist | 316 | class arglist |
313 | { | 317 | { |
@@ -528,21 +532,21 @@ namespace stim{ | @@ -528,21 +532,21 @@ namespace stim{ | ||
528 | std::vector<std::string> arg_vector(){ | 532 | std::vector<std::string> arg_vector(){ |
529 | return args; | 533 | return args; |
530 | } | 534 | } |
531 | - ///Returns an object describing the argument | ||
532 | - | ||
533 | - /// @param _name is the name of the requested argument | ||
534 | - cmd_option operator[](std::string _name){ | ||
535 | - std::vector<cmd_option>::iterator it; | ||
536 | - it = find(opts.begin(), opts.end(), _name);// - opts.begin(); | 535 | + ///Returns an object describing the argument |
537 | 536 | ||
538 | - if(it == opts.end()){ | ||
539 | - std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; | ||
540 | - exit(1); | ||
541 | - } | 537 | + /// @param _name is the name of the requested argument |
538 | + cmd_option operator[](std::string _name){ | ||
539 | + std::vector<cmd_option>::iterator it; | ||
540 | + it = find(opts.begin(), opts.end(), _name);// - opts.begin(); | ||
542 | 541 | ||
543 | - return *it; | 542 | + if(it == opts.end()){ |
543 | + std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; | ||
544 | + exit(1); | ||
544 | } | 545 | } |
545 | 546 | ||
547 | + return *it; | ||
548 | + } | ||
549 | + | ||
546 | 550 | ||
547 | }; | 551 | }; |
548 | 552 |
1 | +/// Reconstruct a 1D function from a 2D symmetric function. This function takes a 2D image f(x,y) as input and | ||
2 | +/// builds a 1D function f(r) where r = sqrt(x^2 + y^2) to approximate this 2D function. | ||
3 | +/// This is useful for several applications, such as: | ||
4 | +/// 1) Calculating a 1D function from a noisy 2D image, when you know the 2D image is supposed to be symmetric | ||
5 | +/// 2) Calculating the average value for every r = sqrt(x^2 + y^2) | ||
6 | + | ||
7 | +/// Given a set of function samples equally spaced by dx, calculate the two samples closest to x and the proximity ratio alpha. | ||
8 | +/// This can be used to linearly interpolate between an array of equally spaced values. Given the query value x, the | ||
9 | +/// interpolated value can be calculated as r = values[sample] * alpha + values[sample + 1] * (1 - alpha) | ||
10 | +/// @param sample is the lowest bin closest to the query point x | ||
11 | +/// @param alpha is the ratio of x between [sample, sample + 1] | ||
12 | +/// @param dx is the spacing between values | ||
13 | +/// @param x is the query point | ||
14 | +template<typename T> | ||
15 | +void lerp_alpha(T& sample, T& alpha, T dx, T x){ | ||
16 | + sample = std::floor(x/dx); | ||
17 | + alpha = 1 - (x - (b * dx)) / dx; | ||
18 | +} | ||
19 | + | ||
20 | +/// This function assumes that the input image is square, that the # of samples are odd, and that r=0 is at the center | ||
21 | +/// @param fr is an array of X elements that will store the reconstructed function | ||
22 | +/// @param dr is the spacing (in pixels) between samples in fr | ||
23 | +template<typename T> | ||
24 | +void cpu_func1_from_symmetric2(T* fr, T& dr, T* fxy, size_t X){ | ||
25 | + | ||
26 | + if(X%2 == 0){ //the 2D function must be odd (a sample must be available for r=0) | ||
27 | + std::err<<"Error, X = "<<X<<" must be odd."<<std::endl; | ||
28 | + exit(1); | ||
29 | + } | ||
30 | + size_t C = X/2+1; //calculate the center pixel coordinate | ||
31 | + size_t N = C * C; //number of values in the folded function | ||
32 | + | ||
33 | + // The first step is to fold the function 8 times to take advantage of symmetry in the grid | ||
34 | + T* folded = (T*) malloc(sizeof(T) * N ); //allocate space for the folded function | ||
35 | + memset(folded, 0, sizeof(T) * N); | ||
36 | + char* count = (char*) malloc( N ); //allocate space for a counter for the folded function | ||
37 | + memset(count, 0, sizeof(T) * N); | ||
38 | + size_t xi, yi; //indices into the image f(xi, yi) | ||
39 | + size_t xii, yii; //indices into the folded image | ||
40 | + T v; //register to store the value at point (xi, yi) | ||
41 | + for(xi = 0; xi < X; xi++){ | ||
42 | + for(yi = 0; yi < X; yi++){ | ||
43 | + v = fxy[yi * X + xi]; //retrieve f(x, y) | ||
44 | + | ||
45 | + xii = xi; | ||
46 | + yii = yi; //initialize the indices into the folded image | ||
47 | + | ||
48 | + //fold the function along the x and y axes | ||
49 | + if(xi > C) xii = 2 * C - xi - 1; //calculate the folded index of x | ||
50 | + if(yi > C) yii = 2 * C - yi - 1; //calculate the folded index of y | ||
51 | + | ||
52 | + if(xii < yii) std::swap<T>(xii, yii); //fold the function again along the 45-degree line | ||
53 | + | ||
54 | + folded[yii * C + xii] += v; //add the value to the folded function | ||
55 | + count[yii * C + xii] += 1; //add a counter to the counter table | ||
56 | + } | ||
57 | + } | ||
58 | + | ||
59 | + //divide out the counter to correct the folded function | ||
60 | + for(size_t i = 0; i < N){ | ||
61 | + folded[i] /= (T)count[i]; //divide out the counter | ||
62 | + } | ||
63 | + | ||
64 | + T max_r = sqrt(X * X + Y * Y); //calculate the maximum r value, which will be along the image diagonal | ||
65 | + T dr = max_r / (X - 1); //spacing between samples in the output function f(r) | ||
66 | + | ||
67 | + T* fA = (T*) malloc( sizeof(T) * X); //allocate space for a counter function storing alpha weights | ||
68 | + memset(fA, 0, sizeof(T) * X); //zero out the alpha array | ||
69 | + memset(fr, 0, sizeof(T) * X); //zero out the output function | ||
70 | + | ||
71 | + T r; //register to store the value of r at each point | ||
72 | + size_t sample; | ||
73 | + T alpha; | ||
74 | + for(xi = 0; xi < C; xi++){ | ||
75 | + for(yi = 0; yi < xi; yi++){ | ||
76 | + r = sqrt(xi*xi + yi*yi); //calculate the value of r for the current (x, y) | ||
77 | + lerp_alpha(sample, alpha, dr, r); //calculate the lowest nearby sample index and the associated alpha weight | ||
78 | + fr[sample] += folded[yi * C + xi] * alpha; //sum the weighted value from the folded function | ||
79 | + fA[sample] += alpha; //sum the weight | ||
80 | + | ||
81 | + if(sample < X - 1){ //if we aren't dealing with the last bin | ||
82 | + fr[sample + 1] += folded[yi * C + xi] * (1.0 - alpha); //calculate the weighted value for the second point | ||
83 | + fA[sample + 1] += 1 - alpha; //add the second alpha value | ||
84 | + } | ||
85 | + } | ||
86 | + } | ||
87 | + | ||
88 | + //divide out the alpha values | ||
89 | + for(size_t i = 0; i < X; i++) | ||
90 | + fr[i] /= fA[i]; | ||
91 | + | ||
92 | + //free allocated memory | ||
93 | + free(folded); | ||
94 | + free(count); | ||
95 | + free(fA); | ||
96 | +} | ||
0 | \ No newline at end of file | 97 | \ No newline at end of file |
1 | +// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree | ||
2 | +// data should be stored in row-major | ||
3 | +// x1,x2,x3,x4,x5...... | ||
4 | +// y1,y2,y3,y4,y5...... | ||
5 | +// .................... | ||
6 | +// .................... | ||
7 | + | ||
8 | +#ifndef KDTREE_H | ||
9 | +#define KDTREE_H | ||
10 | +#define stack_size 50 | ||
11 | + | ||
12 | +#include "device_launch_parameters.h" | ||
13 | +#include <cuda.h> | ||
14 | +#include <cuda_runtime_api.h> | ||
15 | +#include "cuda_runtime.h" | ||
16 | +#include <vector> | ||
17 | +#include <cstring> | ||
18 | +#include <float.h> | ||
19 | +#include <iostream> | ||
20 | +#include <algorithm> | ||
21 | +#include <stim/cuda/cudatools/error.h> | ||
22 | +#include <stim/visualization/aabbn.h> | ||
23 | + | ||
24 | +namespace stim { | ||
25 | + namespace kdtree { | ||
26 | + template<typename T, int D> // typename refers to float or double while D refers to dimension of points | ||
27 | + struct point { | ||
28 | + T dim[D]; // create a structure to store every one input point | ||
29 | + }; | ||
30 | + | ||
31 | + template<typename T> | ||
32 | + class kdnode { | ||
33 | + public: | ||
34 | + kdnode() { // constructor for initializing a kdnode | ||
35 | + parent = NULL; // set every node's parent, left and right kdnode pointers to NULL | ||
36 | + left = NULL; | ||
37 | + right = NULL; | ||
38 | + parent_idx = -1; // set parent node index to default -1 | ||
39 | + left_idx = -1; | ||
40 | + right_idx = -1; | ||
41 | + split_value = -1; // set split_value to default -1 | ||
42 | + } | ||
43 | + int idx; // index of current node | ||
44 | + int parent_idx, left_idx, right_idx; // index of parent, left and right nodes | ||
45 | + kdnode *parent, *left, *right; // parent, left and right kdnodes | ||
46 | + T split_value; // splitting value of current node | ||
47 | + std::vector <size_t> indices; // it indicates the points' indices that current node has | ||
48 | + size_t level; // tree level of current node | ||
49 | + }; | ||
50 | + } // end of namespace kdtree | ||
51 | + | ||
52 | + template <typename T, int D = 3> // set dimension of data to default 3 | ||
53 | + class cpu_kdtree { | ||
54 | + protected: | ||
55 | + int current_axis; // current judging axis | ||
56 | + int n_id; // store the total number of nodes | ||
57 | + std::vector < typename kdtree::point<T, D> > *tmp_points; // transfer or temperary points | ||
58 | + std::vector < typename kdtree::point<T, D> > cpu_tmp_points; // for cpu searching | ||
59 | + kdtree::kdnode<T> *root; // root node | ||
60 | + static cpu_kdtree<T, D> *cur_tree_ptr; | ||
61 | + public: | ||
62 | + cpu_kdtree() { // constructor for creating a cpu_kdtree | ||
63 | + cur_tree_ptr = this; // create a class pointer points to the current class value | ||
64 | + n_id = 0; // set total number of points to default 0 | ||
65 | + } | ||
66 | + ~cpu_kdtree() { // destructor of cpu_kdtree | ||
67 | + std::vector <kdtree::kdnode<T>*> next_nodes; | ||
68 | + next_nodes.push_back(root); | ||
69 | + while (next_nodes.size()) { | ||
70 | + std::vector <kdtree::kdnode<T>*> next_search_nodes; | ||
71 | + while (next_nodes.size()) { | ||
72 | + kdtree::kdnode<T> *cur = next_nodes.back(); | ||
73 | + next_nodes.pop_back(); | ||
74 | + if (cur->left) | ||
75 | + next_search_nodes.push_back(cur->left); | ||
76 | + if (cur->right) | ||
77 | + next_search_nodes.push_back(cur->right); | ||
78 | + delete cur; | ||
79 | + } | ||
80 | + next_nodes = next_search_nodes; | ||
81 | + } | ||
82 | + root = NULL; | ||
83 | + } | ||
84 | + void cpu_create(std::vector < typename kdtree::point<T, D> > &reference_points, size_t max_levels) { | ||
85 | + tmp_points = &reference_points; | ||
86 | + root = new kdtree::kdnode<T>(); // initializing the root node | ||
87 | + root->idx = n_id++; // the index of root is 0 | ||
88 | + root->level = 0; // tree level begins at 0 | ||
89 | + root->indices.resize(reference_points.size()); // get the number of points | ||
90 | + for (size_t i = 0; i < reference_points.size(); i++) { | ||
91 | + root->indices[i] = i; // set indices of input points | ||
92 | + } | ||
93 | + std::vector <kdtree::kdnode<T>*> next_nodes; // next nodes | ||
94 | + next_nodes.push_back(root); // push back the root node | ||
95 | + while (next_nodes.size()) { | ||
96 | + std::vector <kdtree::kdnode<T>*> next_search_nodes; // next search nodes | ||
97 | + while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search | ||
98 | + kdtree::kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first) | ||
99 | + next_nodes.pop_back(); // pop out current node in order to store next round of nodes | ||
100 | + if (current_node->level < max_levels) { | ||
101 | + if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point | ||
102 | + kdtree::kdnode<T> *left = new kdtree::kdnode<T>(); | ||
103 | + kdtree::kdnode<T> *right = new kdtree::kdnode<T>(); | ||
104 | + left->idx = n_id++; // set the index of current node's left node | ||
105 | + right->idx = n_id++; | ||
106 | + split(current_node, left, right); // split left and right and determine a node | ||
107 | + std::vector <size_t> temp; // empty vecters of int | ||
108 | + //temp.resize(current_node->indices.size()); | ||
109 | + current_node->indices.swap(temp); // clean up current node's indices | ||
110 | + current_node->left = left; | ||
111 | + current_node->right = right; | ||
112 | + current_node->left_idx = left->idx; | ||
113 | + current_node->right_idx = right->idx; | ||
114 | + if (right->indices.size()) | ||
115 | + next_search_nodes.push_back(right); // left pop out first | ||
116 | + if (left->indices.size()) | ||
117 | + next_search_nodes.push_back(left); | ||
118 | + } | ||
119 | + } | ||
120 | + } | ||
121 | + next_nodes = next_search_nodes; // go deeper within the tree | ||
122 | + } | ||
123 | + } | ||
124 | + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort | ||
125 | + std::vector < typename kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer | ||
126 | + return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis]; | ||
127 | + } | ||
128 | + void split(kdtree::kdnode<T> *cur, kdtree::kdnode<T> *left, kdtree::kdnode<T> *right) { | ||
129 | + std::vector < typename kdtree::point<T, D> > &pts = *tmp_points; | ||
130 | + current_axis = cur->level % D; // indicate the judicative dimension or axis | ||
131 | + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data | ||
132 | + size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor | ||
133 | + cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node | ||
134 | + left->parent = cur; // set the parent of the next search nodes to current node | ||
135 | + right->parent = cur; | ||
136 | + left->level = cur->level + 1; // level + 1 | ||
137 | + right->level = cur->level + 1; | ||
138 | + left->parent_idx = cur->idx; // set its parent node's index | ||
139 | + right->parent_idx = cur->idx; | ||
140 | + for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one | ||
141 | + size_t idx = cur->indices[i]; | ||
142 | + if (pts[idx].dim[current_axis] < cur->split_value) | ||
143 | + left->indices.push_back(idx); | ||
144 | + else | ||
145 | + right->indices.push_back(idx); | ||
146 | + } | ||
147 | + } | ||
148 | + void create(T *h_reference_points, size_t reference_count, size_t max_levels) { | ||
149 | + std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way | ||
150 | + for (size_t j = 0; j < reference_count; j++) | ||
151 | + for (size_t i = 0; i < D; i++) | ||
152 | + reference_points[j].dim[i] = h_reference_points[j * D + i]; | ||
153 | + cpu_create(reference_points, max_levels); | ||
154 | + cpu_tmp_points = *tmp_points; | ||
155 | + } | ||
156 | + int get_num_nodes() const { // get the total number of nodes | ||
157 | + return n_id; | ||
158 | + } | ||
159 | + kdtree::kdnode<T>* get_root() const { // get the root node of tree | ||
160 | + return root; | ||
161 | + } | ||
162 | + T cpu_distance(const kdtree::point<T, D> &a, const kdtree::point<T, D> &b) { | ||
163 | + T distance = 0; | ||
164 | + | ||
165 | + for (size_t i = 0; i < D; i++) { | ||
166 | + T d = a.dim[i] - b.dim[i]; | ||
167 | + distance += d*d; | ||
168 | + } | ||
169 | + return distance; | ||
170 | + } | ||
171 | + void cpu_search_at_node(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, size_t *index, T *distance, kdtree::kdnode<T> **node) { | ||
172 | + T best_distance = FLT_MAX; // initialize the best distance to max of floating point | ||
173 | + size_t best_index = 0; | ||
174 | + std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points; | ||
175 | + while (true) { | ||
176 | + size_t split_axis = cur->level % D; | ||
177 | + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace | ||
178 | + *node = cur; // pointer points to a pointer | ||
179 | + for (size_t i = 0; i < cur->indices.size(); i++) { | ||
180 | + size_t idx = cur->indices[i]; | ||
181 | + T d = cpu_distance(query, pts[idx]); // compute distances | ||
182 | + /// if we want to compute k nearest neighbor, we can input the last resul | ||
183 | + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k | ||
184 | + if (d < best_distance) { | ||
185 | + best_distance = d; | ||
186 | + best_index = idx; // record the nearest neighbor index | ||
187 | + } | ||
188 | + } | ||
189 | + break; // find the target point then break the loop | ||
190 | + } | ||
191 | + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side | ||
192 | + cur = cur->left; | ||
193 | + } | ||
194 | + else { | ||
195 | + cur = cur->right; | ||
196 | + } | ||
197 | + } | ||
198 | + *index = best_index; | ||
199 | + *distance = best_distance; | ||
200 | + } | ||
201 | + void cpu_search_at_node_range(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, T range, size_t *index, T *distance) { | ||
202 | + T best_distance = FLT_MAX; // initialize the best distance to max of floating point | ||
203 | + size_t best_index = 0; | ||
204 | + std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points; | ||
205 | + std::vector < typename kdtree::kdnode<T>*> next_node; | ||
206 | + next_node.push_back(cur); | ||
207 | + while (next_node.size()) { | ||
208 | + std::vector<typename kdtree::kdnode<T>*> next_search; | ||
209 | + while (next_node.size()) { | ||
210 | + cur = next_node.back(); | ||
211 | + next_node.pop_back(); | ||
212 | + size_t split_axis = cur->level % D; | ||
213 | + if (cur->left == NULL) { | ||
214 | + for (size_t i = 0; i < cur->indices.size(); i++) { | ||
215 | + size_t idx = cur->indices[i]; | ||
216 | + T d = cpu_distance(query, pts[idx]); | ||
217 | + if (d < best_distance) { | ||
218 | + best_distance = d; | ||
219 | + best_index = idx; | ||
220 | + } | ||
221 | + } | ||
222 | + } | ||
223 | + else { | ||
224 | + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension | ||
225 | + /// there are three possibilities: on either left or right, and on both left and right | ||
226 | + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist | ||
227 | + if (d < 0) | ||
228 | + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region | ||
229 | + else | ||
230 | + next_search.push_back(cur->right); | ||
231 | + } | ||
232 | + else { // it is possible that nereast neighbor will appear on both left and right | ||
233 | + next_search.push_back(cur->left); | ||
234 | + next_search.push_back(cur->right); | ||
235 | + } | ||
236 | + } | ||
237 | + } | ||
238 | + next_node = next_search; // pop out at least one time | ||
239 | + } | ||
240 | + *index = best_index; | ||
241 | + *distance = best_distance; | ||
242 | + } | ||
243 | + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) { | ||
244 | + /// first convert the input query point into specific type | ||
245 | + kdtree::point<T, D> query; | ||
246 | + for (size_t j = 0; j < query_count; j++) { | ||
247 | + for (size_t i = 0; i < D; i++) | ||
248 | + query.dim[i] = h_query_points[j * D + i]; | ||
249 | + /// find the nearest node, this will be the upper bound for the next time searching | ||
250 | + kdtree::kdnode<T> *best_node = NULL; | ||
251 | + T best_distance = FLT_MAX; | ||
252 | + size_t best_index = 0; | ||
253 | + T radius = 0; // radius for range | ||
254 | + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step | ||
255 | + radius = sqrt(best_distance); // It is possible that nearest will appear in another region | ||
256 | + /// find other possibilities | ||
257 | + kdtree::kdnode<T> *cur = best_node; | ||
258 | + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node | ||
259 | + /// go up | ||
260 | + kdtree::kdnode<T> *parent = cur->parent; // travel back to every node that we pass through | ||
261 | + size_t split_axis = (parent->level) % D; | ||
262 | + /// search other nodes | ||
263 | + size_t tmp_index; | ||
264 | + T tmp_distance = FLT_MAX; | ||
265 | + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) { | ||
266 | + /// search opposite node | ||
267 | + if (parent->left != cur) | ||
268 | + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node | ||
269 | + else | ||
270 | + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance); | ||
271 | + } | ||
272 | + if (tmp_distance < best_distance) { | ||
273 | + best_distance = tmp_distance; | ||
274 | + best_index = tmp_index; | ||
275 | + } | ||
276 | + cur = parent; | ||
277 | + } | ||
278 | + h_indices[j] = best_index; | ||
279 | + h_distances[j] = best_distance; | ||
280 | + } | ||
281 | + } | ||
282 | + }; //end class kdtree | ||
283 | + | ||
284 | + template <typename T, int D> | ||
285 | + cpu_kdtree<T, D>* cpu_kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class | ||
286 | + | ||
287 | + template <typename T> | ||
288 | + struct cuda_kdnode { | ||
289 | + int parent, left, right; | ||
290 | + T split_value; | ||
291 | + size_t num_index; // number of indices it has | ||
292 | + int index; // the beginning index | ||
293 | + size_t level; | ||
294 | + }; | ||
295 | + | ||
296 | + template <typename T, int D> | ||
297 | + __device__ T gpu_distance(kdtree::point<T, D> &a, kdtree::point<T, D> &b) { | ||
298 | + T distance = 0; | ||
299 | + | ||
300 | + for (size_t i = 0; i < D; i++) { | ||
301 | + T d = a.dim[i] - b.dim[i]; | ||
302 | + distance += d*d; | ||
303 | + } | ||
304 | + return distance; | ||
305 | + } | ||
306 | + template <typename T, int D> | ||
307 | + __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, int cur, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) { | ||
308 | + T best_distance = FLT_MAX; | ||
309 | + size_t best_index = 0; | ||
310 | + | ||
311 | + while (true) { // break until reach the bottom | ||
312 | + int split_axis = nodes[cur].level % D; | ||
313 | + if (nodes[cur].left == -1) { // check whether it has left node or not | ||
314 | + *d_node = cur; | ||
315 | + for (int i = 0; i < nodes[cur].num_index; i++) { | ||
316 | + size_t idx = indices[nodes[cur].index + i]; | ||
317 | + T dist = gpu_distance<T, D>(d_query_point, d_reference_points[idx]); | ||
318 | + if (dist < best_distance) { | ||
319 | + best_distance = dist; | ||
320 | + best_index = idx; | ||
321 | + } | ||
322 | + } | ||
323 | + break; | ||
324 | + } | ||
325 | + else if (d_query_point.dim[split_axis] < nodes[cur].split_value) { // jump into specific son node | ||
326 | + cur = nodes[cur].left; | ||
327 | + } | ||
328 | + else { | ||
329 | + cur = nodes[cur].right; | ||
330 | + } | ||
331 | + } | ||
332 | + *d_distance = best_distance; | ||
333 | + *d_index = best_index; | ||
334 | + } | ||
335 | + template <typename T, int D> | ||
336 | + __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { | ||
337 | + T best_distance = FLT_MAX; | ||
338 | + size_t best_index = 0; | ||
339 | + | ||
340 | + int next_nodes_pos = 0; // initialize pop out order index | ||
341 | + next_nodes[id * stack_size + next_nodes_pos] = cur; // find data that belongs to the very specific thread | ||
342 | + next_nodes_pos++; | ||
343 | + | ||
344 | + while (next_nodes_pos) { | ||
345 | + int next_search_nodes_pos = 0; // record push back order index | ||
346 | + while (next_nodes_pos) { | ||
347 | + cur = next_nodes[id * stack_size + next_nodes_pos - 1]; // pop out the last push in one and keep poping out | ||
348 | + next_nodes_pos--; | ||
349 | + int split_axis = nodes[cur].level % D; | ||
350 | + | ||
351 | + if (nodes[cur].left == -1) { | ||
352 | + for (int i = 0; i < nodes[cur].num_index; i++) { | ||
353 | + int idx = indices[nodes[cur].index + i]; // all indices are stored in one array, pick up from every node's beginning index | ||
354 | + T d = gpu_distance<T>(d_query_point, d_reference_points[idx]); | ||
355 | + if (d < best_distance) { | ||
356 | + best_distance = d; | ||
357 | + best_index = idx; | ||
358 | + } | ||
359 | + } | ||
360 | + } | ||
361 | + else { | ||
362 | + T d = d_query_point.dim[split_axis] - nodes[cur].split_value; | ||
363 | + | ||
364 | + if (fabs(d) > range) { | ||
365 | + if (d < 0) { | ||
366 | + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left; | ||
367 | + next_search_nodes_pos++; | ||
368 | + } | ||
369 | + else { | ||
370 | + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right; | ||
371 | + next_search_nodes_pos++; | ||
372 | + } | ||
373 | + } | ||
374 | + else { | ||
375 | + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right; | ||
376 | + next_search_nodes_pos++; | ||
377 | + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left; | ||
378 | + next_search_nodes_pos++; | ||
379 | + if (next_search_nodes_pos > stack_size) { | ||
380 | + printf("Thread conflict might be caused by thread %d, so please try smaller input max_tree_levels\n", id); | ||
381 | + (*Judge)++; | ||
382 | + } | ||
383 | + } | ||
384 | + } | ||
385 | + } | ||
386 | + for (int i = 0; i < next_search_nodes_pos; i++) | ||
387 | + next_nodes[id * stack_size + i] = next_search_nodes[id * stack_size + i]; | ||
388 | + next_nodes_pos = next_search_nodes_pos; | ||
389 | + } | ||
390 | + *d_distance = best_distance; | ||
391 | + *d_index = best_index; | ||
392 | + } | ||
393 | + template <typename T, int D> | ||
394 | + __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { | ||
395 | + int best_node = 0; | ||
396 | + T best_distance = FLT_MAX; | ||
397 | + size_t best_index = 0; | ||
398 | + T radius = 0; | ||
399 | + | ||
400 | + search_at_node<T, D>(nodes, indices, d_reference_points, 0, d_query_point, &best_index, &best_distance, &best_node); | ||
401 | + radius = sqrt(best_distance); // get range | ||
402 | + int cur = best_node; | ||
403 | + | ||
404 | + while (nodes[cur].parent != -1) { | ||
405 | + int parent = nodes[cur].parent; | ||
406 | + int split_axis = nodes[parent].level % D; | ||
407 | + | ||
408 | + T tmp_dist = FLT_MAX; | ||
409 | + size_t tmp_idx; | ||
410 | + if (fabs(nodes[parent].split_value - d_query_point.dim[split_axis]) <= radius) { | ||
411 | + if (nodes[parent].left != cur) | ||
412 | + search_at_node_range(nodes, indices, d_reference_points, d_query_point, nodes[parent].left, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge); | ||
413 | + else | ||
414 | + search_at_node_range(nodes, indices, d_reference_points, d_query_point, nodes[parent].right, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge); | ||
415 | + } | ||
416 | + if (tmp_dist < best_distance) { | ||
417 | + best_distance = tmp_dist; | ||
418 | + best_index = tmp_idx; | ||
419 | + } | ||
420 | + cur = parent; | ||
421 | + } | ||
422 | + *d_distance = sqrt(best_distance); | ||
423 | + *d_index = best_index; | ||
424 | + } | ||
425 | + template <typename T, int D> | ||
426 | + __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) { | ||
427 | + size_t idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
428 | + if (idx >= d_query_count) return; // avoid segfault | ||
429 | + | ||
430 | + search<T, D>(nodes, indices, d_reference_points, d_query_points[idx], &d_indices[idx], &d_distances[idx], idx, next_nodes, next_search_nodes, Judge); // every query points are independent | ||
431 | + } | ||
432 | + | ||
433 | + template <typename T, int D = 3> | ||
434 | + class cuda_kdtree { | ||
435 | + protected: | ||
436 | + cuda_kdnode<T> *d_nodes; | ||
437 | + size_t *d_index; | ||
438 | + kdtree::point<T, D>* d_reference_points; | ||
439 | + size_t npts; | ||
440 | + int num_nodes; | ||
441 | + public: | ||
442 | + ~cuda_kdtree() { | ||
443 | + HANDLE_ERROR(cudaFree(d_nodes)); | ||
444 | + HANDLE_ERROR(cudaFree(d_index)); | ||
445 | + HANDLE_ERROR(cudaFree(d_reference_points)); | ||
446 | + } | ||
447 | + | ||
448 | + /// Create a KD-tree given a pointer to an array of reference points and the number of reference points | ||
449 | + /// @param h_reference_points is a host array containing the reference points in (x0, y0, z0, ...., ) order | ||
450 | + /// @param reference_count is the number of reference point in the array | ||
451 | + /// @param max_levels is the deepest number of tree levels allowed | ||
452 | + void create(T *h_reference_points, size_t reference_count, size_t max_levels = 3) { | ||
453 | + if (max_levels > 10) { | ||
454 | + std::cout<<"The max_tree_levels should be smaller!"<<std::endl; | ||
455 | + exit(1); | ||
456 | + } | ||
457 | + //bb.init(&h_reference_points[0]); | ||
458 | + //aaboundingboxing<T, D>(bb, h_reference_points, reference_count); | ||
459 | + | ||
460 | + std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way | ||
461 | + for (size_t j = 0; j < reference_count; j++) | ||
462 | + for (size_t i = 0; i < D; i++) | ||
463 | + reference_points[j].dim[i] = h_reference_points[j * D + i]; | ||
464 | + cpu_kdtree<T, D> tree; // creating a tree on cpu | ||
465 | + tree.cpu_create(reference_points, max_levels); // building a tree on cpu | ||
466 | + kdtree::kdnode<T> *d_root = tree.get_root(); | ||
467 | + num_nodes = tree.get_num_nodes(); | ||
468 | + npts = reference_count; // also equals to reference_count | ||
469 | + | ||
470 | + HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device | ||
471 | + HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts)); | ||
472 | + HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(kdtree::point<T, D>) * npts)); | ||
473 | + | ||
474 | + std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes); | ||
475 | + std::vector <size_t> indices(npts); | ||
476 | + std::vector <kdtree::kdnode<T>*> next_nodes; | ||
477 | + size_t cur_pos = 0; | ||
478 | + next_nodes.push_back(d_root); | ||
479 | + while (next_nodes.size()) { | ||
480 | + std::vector <typename kdtree::kdnode<T>*> next_search_nodes; | ||
481 | + while (next_nodes.size()) { | ||
482 | + kdtree::kdnode<T> *cur = next_nodes.back(); | ||
483 | + next_nodes.pop_back(); | ||
484 | + int id = cur->idx; // the nodes at same level are independent | ||
485 | + tmp_nodes[id].level = cur->level; | ||
486 | + tmp_nodes[id].parent = cur->parent_idx; | ||
487 | + tmp_nodes[id].left = cur->left_idx; | ||
488 | + tmp_nodes[id].right = cur->right_idx; | ||
489 | + tmp_nodes[id].split_value = cur->split_value; | ||
490 | + tmp_nodes[id].num_index = cur->indices.size(); // number of index | ||
491 | + if (cur->indices.size()) { | ||
492 | + for (size_t i = 0; i < cur->indices.size(); i++) | ||
493 | + indices[cur_pos + i] = cur->indices[i]; | ||
494 | + | ||
495 | + tmp_nodes[id].index = (int)cur_pos; // beginning index of reference_points that every bottom node has | ||
496 | + cur_pos += cur->indices.size(); // store indices continuously for every query_point | ||
497 | + } | ||
498 | + else { | ||
499 | + tmp_nodes[id].index = -1; | ||
500 | + } | ||
501 | + | ||
502 | + if (cur->left) | ||
503 | + next_search_nodes.push_back(cur->left); | ||
504 | + | ||
505 | + if (cur->right) | ||
506 | + next_search_nodes.push_back(cur->right); | ||
507 | + } | ||
508 | + next_nodes = next_search_nodes; | ||
509 | + } | ||
510 | + HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice)); | ||
511 | + HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice)); | ||
512 | + HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point<T, D>) * reference_points.size(), cudaMemcpyHostToDevice)); | ||
513 | + } | ||
514 | + | ||
515 | + /// Search the KD tree for nearest neighbors to a set of specified query points | ||
516 | + /// @param h_query_points an array of query points in (x0, y0, z0, ...) order | ||
517 | + /// @param query_count is the number of query points | ||
518 | + /// @param indices are the indices to the nearest reference point for each query points | ||
519 | + /// @param distances is an array containing the distance between each query point and the nearest reference point | ||
520 | + void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) { | ||
521 | + std::vector < typename kdtree::point<T, D> > query_points(query_count); | ||
522 | + for (size_t j = 0; j < query_count; j++) | ||
523 | + for (size_t i = 0; i < D; i++) | ||
524 | + query_points[j].dim[i] = h_query_points[j * D + i]; | ||
525 | + | ||
526 | + unsigned int threads = (unsigned int)(query_points.size() > 1024 ? 1024 : query_points.size()); | ||
527 | + unsigned int blocks = (unsigned int)(query_points.size() / threads + (query_points.size() % threads ? 1 : 0)); | ||
528 | + | ||
529 | + kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu | ||
530 | + size_t *d_indices; | ||
531 | + T *d_distances; | ||
532 | + | ||
533 | + int *next_nodes; // create two STACK-like array | ||
534 | + int *next_search_nodes; | ||
535 | + | ||
536 | + int *Judge = NULL; // judge variable to see whether one thread is overwrite another thread's memory | ||
537 | + | ||
538 | + HANDLE_ERROR(cudaMalloc((void**)&d_query_points, sizeof(T) * query_points.size() * D)); | ||
539 | + HANDLE_ERROR(cudaMalloc((void**)&d_indices, sizeof(size_t) * query_points.size())); | ||
540 | + HANDLE_ERROR(cudaMalloc((void**)&d_distances, sizeof(T) * query_points.size())); | ||
541 | + HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * stack_size * sizeof(int))); // STACK size right now is 50, you can change it if you mean to | ||
542 | + HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * stack_size * sizeof(int))); | ||
543 | + HANDLE_ERROR(cudaMemcpy(d_query_points, &query_points[0], sizeof(T) * query_points.size() * D, cudaMemcpyHostToDevice)); | ||
544 | + | ||
545 | + search_batch<<<blocks, threads>>> (d_nodes, d_index, d_reference_points, d_query_points, query_points.size(), d_indices, d_distances, next_nodes, next_search_nodes, Judge); | ||
546 | + | ||
547 | + if (Judge == NULL) { // do the following work if the thread works safely | ||
548 | + HANDLE_ERROR(cudaMemcpy(indices, d_indices, sizeof(size_t) * query_points.size(), cudaMemcpyDeviceToHost)); | ||
549 | + HANDLE_ERROR(cudaMemcpy(distances, d_distances, sizeof(T) * query_points.size(), cudaMemcpyDeviceToHost)); | ||
550 | + } | ||
551 | + | ||
552 | + HANDLE_ERROR(cudaFree(next_nodes)); | ||
553 | + HANDLE_ERROR(cudaFree(next_search_nodes)); | ||
554 | + HANDLE_ERROR(cudaFree(d_query_points)); | ||
555 | + HANDLE_ERROR(cudaFree(d_indices)); | ||
556 | + HANDLE_ERROR(cudaFree(d_distances)); | ||
557 | + } | ||
558 | + | ||
559 | + /// Return the number of points in the KD tree | ||
560 | + size_t num_points() { | ||
561 | + return npts; | ||
562 | + } | ||
563 | + | ||
564 | + stim::aabbn<T, D> getbox() { | ||
565 | + size_t N = npts; | ||
566 | + //std::vector < typename kdtree::point<T, D> > cpu_ref(npts); //allocate space on the CPU for the reference points | ||
567 | + T* cpu_ref = (T*)malloc(N * D * sizeof(T)); //allocate space on the CPU for the reference points | ||
568 | + HANDLE_ERROR(cudaMemcpy(cpu_ref, d_reference_points, N * D * sizeof(T), cudaMemcpyDeviceToHost)); //copy from GPU to CPU | ||
569 | + | ||
570 | + stim::aabbn<T, D> bb(cpu_ref); | ||
571 | + | ||
572 | + for (size_t i = 1; i < N; i++) { //for each reference point | ||
573 | + //std::cout << "( " << cpu_ref[i * D + 0] << ", " << cpu_ref[i * D + 1] << ", " << cpu_ref[i * D + 2] << ")" << std::endl; | ||
574 | + bb.insert(&cpu_ref[i * D]); | ||
575 | + } | ||
576 | + return bb; | ||
577 | + } | ||
578 | + | ||
579 | + //generate an implicit distance field for the KD-tree | ||
580 | + void dist_field3(T* dist, size_t* dims, stim::aabbn<T, 3> bb) { | ||
581 | + size_t N = 1; //number of query points that make up the distance field | ||
582 | + for (size_t d = 0; d < 3; d++) N *= dims[d]; //calculate the total number of query points | ||
583 | + | ||
584 | + //calculate the grid spatial parameters | ||
585 | + T dx = 0; | ||
586 | + if (dims[0] > 1) dx = bb.length(0) / dims[0]; | ||
587 | + T dy = 0; | ||
588 | + if (dims[1] > 1) dy = bb.length(1) / dims[1]; | ||
589 | + T dz = 0; | ||
590 | + if (dims[2] > 1) dz = bb.length(2) / dims[2]; | ||
591 | + | ||
592 | + T* Q = (T*)malloc(N * 3 * sizeof(T)); //allocate space for the query points | ||
593 | + size_t i; | ||
594 | + for (size_t z = 0; z < dims[2]; z++) { //for each query point (which is a point in the grid) | ||
595 | + for (size_t y = 0; y < dims[1]; y++) { | ||
596 | + for (size_t x = 0; x < dims[0]; x++) { | ||
597 | + i = z * dims[1] * dims[0] + y * dims[0] + x; | ||
598 | + Q[i * 3 + 0] = bb.low[0] + x * dx + dx / 2; | ||
599 | + Q[i * 3 + 1] = bb.low[1] + y * dy + dy / 2; | ||
600 | + Q[i * 3 + 2] = bb.low[2] + z * dz + dz / 2; | ||
601 | + //std::cout << i<<" "<<Q[i * 3 + 0] << " " << Q[i * 3 + 1] << " " << Q[i * 3 + 2] << std::endl; | ||
602 | + } | ||
603 | + } | ||
604 | + } | ||
605 | + size_t* temp = (size_t*)malloc(N * sizeof(size_t)); //allocate space to store the indices (unused) | ||
606 | + search(Q, N, temp, dist); | ||
607 | + } | ||
608 | + | ||
609 | + //generate an implicit distance field for the KD-tree | ||
610 | + void dist_field3(T* dist, size_t* dims) { | ||
611 | + stim::aabbn<T, D> bb = getbox(); //get a bounding box around the tree | ||
612 | + dist_field3(dist, dims, bb); | ||
613 | + } | ||
614 | + | ||
615 | + }; | ||
616 | +} //end namespace stim | ||
617 | +#endif | ||
0 | \ No newline at end of file | 618 | \ No newline at end of file |
1 | +#ifndef STIM_UTIL_FILESIZE_H | ||
2 | +#define STIM_UTIL_FILESIZE_H | ||
3 | + | ||
4 | +#ifdef _WIN32 | ||
5 | +#include <Windows.h> | ||
6 | +#else | ||
7 | +#include <sys/types.h> | ||
8 | +#include <sys/stat.h> | ||
9 | +#endif | ||
10 | + | ||
11 | +namespace stim{ | ||
12 | +static size_t file_size(std::string filename){ | ||
13 | +#ifdef _WIN32 | ||
14 | + HANDLE hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); | ||
15 | + if(hFile == INVALID_HANDLE_VALUE) return 0; | ||
16 | + LARGE_INTEGER size; | ||
17 | + if(!GetFileSizeEx(hFile, &size)){ | ||
18 | + CloseHandle(hFile); | ||
19 | + return 0; | ||
20 | + } | ||
21 | + CloseHandle(hFile); | ||
22 | + return (size_t)size.QuadPart; | ||
23 | +#else | ||
24 | + struct stat sb; | ||
25 | + stat(filename.c_str(), &sb); | ||
26 | + return sb.st_size; | ||
27 | +#endif | ||
28 | +} | ||
29 | + | ||
30 | +} //end namespace stim | ||
31 | + | ||
32 | + | ||
33 | + | ||
34 | +#endif |
stim/visualization/aabb3.h
@@ -2,51 +2,31 @@ | @@ -2,51 +2,31 @@ | ||
2 | #define STIM_AABB3_H | 2 | #define STIM_AABB3_H |
3 | 3 | ||
4 | #include <stim/cuda/cudatools/callable.h> | 4 | #include <stim/cuda/cudatools/callable.h> |
5 | +#include <stim/visualization/aabbn.h> | ||
5 | 6 | ||
6 | namespace stim{ | 7 | namespace stim{ |
7 | 8 | ||
8 | -/// Structure for a 3D axis aligned bounding box | 9 | + template<typename T> |
10 | + using aabb3 = aabbn<T, 3>; | ||
11 | +/*/// Structure for a 3D axis aligned bounding box | ||
9 | template<typename T> | 12 | template<typename T> |
10 | -struct aabb3{ | ||
11 | - | ||
12 | -//protected: | ||
13 | - | ||
14 | - T low[3]; //top left corner position | ||
15 | - T high[3]; //dimensions along x and y and z | ||
16 | - | ||
17 | -//public: | ||
18 | - | ||
19 | - CUDA_CALLABLE aabb3(T x, T y, T z){ //initialize an axis aligned bounding box of size 0 at the given position | ||
20 | - low[0] = high[0] = x; //set the position to the user specified coordinates | ||
21 | - low[1] = high[1] = y; | ||
22 | - low[2] = high[2] = z; | 13 | +struct aabb3 : public aabbn<T, 3>{ |
14 | + | ||
15 | + aabb3() : aabbn() {} | ||
16 | + aabb3(T x0, T y0, T z0, T x1, T y1, T z1){ | ||
17 | + low[0] = x0; | ||
18 | + low[1] = y0; | ||
19 | + low[2] = z0; | ||
20 | + high[0] = x0; | ||
21 | + high[1] = x1; | ||
22 | + high[2] = x2; | ||
23 | } | 23 | } |
24 | 24 | ||
25 | - //insert a point into the bounding box, growing the box appropriately | ||
26 | - CUDA_CALLABLE void insert(T x, T y, T z){ | ||
27 | - if(x < low[0]) low[0] = x; | ||
28 | - if(y < low[1]) low[1] = y; | ||
29 | - if(z < low[2]) low[2] = z; | ||
30 | - | ||
31 | - if(x > high[0]) high[0] = x; | ||
32 | - if(y > high[1]) high[1] = y; | ||
33 | - if(z > high[2]) high[2] = z; | ||
34 | - } | ||
35 | - | ||
36 | - //trim the bounding box so that the lower bounds are (x, y, z) | ||
37 | - CUDA_CALLABLE void trim_low(T x, T y, T z){ | ||
38 | - if(low[0] < x) low[0] = x; | ||
39 | - if(low[1] < y) low[1] = y; | ||
40 | - if(low[2] < z) low[2] = z; | ||
41 | - } | 25 | + aabb3 aabbn<T, 3>() { |
42 | 26 | ||
43 | - CUDA_CALLABLE void trim_high(T x, T y, T z){ | ||
44 | - if(high[0] > x) high[0] = x; | ||
45 | - if(high[1] > y) high[1] = y; | ||
46 | - if(high[2] > z) high[2] = z; | ||
47 | } | 27 | } |
48 | 28 | ||
49 | -}; | 29 | +};*/ |
50 | 30 | ||
51 | } | 31 | } |
52 | 32 |
1 | +#ifndef STIM_AABBN_H | ||
2 | +#define STIM_AABBN_H | ||
3 | + | ||
4 | +#include <vector> | ||
5 | +#include <stim/cuda/cudatools/callable.h> | ||
6 | + | ||
7 | +namespace stim{ | ||
8 | + | ||
9 | +/// Structure for a 3D axis aligned bounding box | ||
10 | +template<typename T, size_t D> | ||
11 | +struct aabbn{ | ||
12 | + | ||
13 | +//protected: | ||
14 | + | ||
15 | + T low[D]; //top left corner position | ||
16 | + T high[D]; //dimensions along x and y and z | ||
17 | + | ||
18 | + CUDA_CALLABLE void init(T* i) { | ||
19 | + for (size_t d = 0; d < D; d++) | ||
20 | + low[d] = high[d] = i[d]; | ||
21 | + } | ||
22 | + | ||
23 | + CUDA_CALLABLE aabbn() {} | ||
24 | + CUDA_CALLABLE aabbn(T* i) { | ||
25 | + init(i); | ||
26 | + } | ||
27 | + | ||
28 | + CUDA_CALLABLE aabbn(T x0, T x1) { | ||
29 | + low[0] = x0; | ||
30 | + high[0] = x1; | ||
31 | + } | ||
32 | + | ||
33 | + CUDA_CALLABLE aabbn(T x0, T y0, T x1, T y1) : aabbn(x0, x1) { | ||
34 | + low[1] = y0; | ||
35 | + high[1] = y1; | ||
36 | + } | ||
37 | + | ||
38 | + CUDA_CALLABLE aabbn(T x0, T y0, T z0, T x1, T y1, T z1) : aabbn(x0, y0, x1, y1) { | ||
39 | + low[2] = z0; | ||
40 | + high[2] = z1; | ||
41 | + } | ||
42 | + | ||
43 | + | ||
44 | + //insert a point into the bounding box, growing the box appropriately | ||
45 | + CUDA_CALLABLE void insert(T* p){ | ||
46 | + for(size_t d = 0; d < D; d++){ | ||
47 | + if(p[d] < low[d]) low[d] = p[d]; | ||
48 | + if(p[d] > high[d]) high[d] = p[d]; | ||
49 | + } | ||
50 | + } | ||
51 | + | ||
52 | + //trim the bounding box so that the lower bounds are b(x, y, z, ...) | ||
53 | + CUDA_CALLABLE void trim_low(T* b){ | ||
54 | + for(size_t d = 0; d < D; d++) | ||
55 | + if(low[d] < b[d]) low[d] = b[d]; | ||
56 | + } | ||
57 | + | ||
58 | + CUDA_CALLABLE void trim_high(T* b){ | ||
59 | + for(size_t d = 0; d < D; d++) | ||
60 | + if(low[d] > b[d]) low[d] = b[d]; | ||
61 | + } | ||
62 | + | ||
63 | + CUDA_CALLABLE T length(size_t d) { | ||
64 | + return high[d] - low[d]; | ||
65 | + } | ||
66 | + | ||
67 | + CUDA_CALLABLE aabbn<T, D> operator*(T s) { | ||
68 | + aabbn<T, D> newbox; | ||
69 | + for (size_t d = 0; d < D; d++) { | ||
70 | + T c = (low[d] + high[d]) / 2; | ||
71 | + T l = high[d] - low[d]; | ||
72 | + newbox.low[d] = c - l * s / 2; | ||
73 | + newbox.high[d] = c + l * s / 2; | ||
74 | + } | ||
75 | + return newbox; | ||
76 | + } | ||
77 | + | ||
78 | + //translate the box along dimension d a distance of v | ||
79 | + CUDA_CALLABLE void translate(size_t d, T v) { | ||
80 | + for (size_t d = 0; d < D; d++) { | ||
81 | + low[d] += v; | ||
82 | + high[d] += v; | ||
83 | + } | ||
84 | + } | ||
85 | + | ||
86 | +}; | ||
87 | + | ||
88 | +} | ||
89 | + | ||
90 | + | ||
91 | +#endif | ||
0 | \ No newline at end of file | 92 | \ No newline at end of file |
stim/visualization/cylinder.h
@@ -4,6 +4,9 @@ | @@ -4,6 +4,9 @@ | ||
4 | #include <stim/math/circle.h> | 4 | #include <stim/math/circle.h> |
5 | #include <stim/biomodels/centerline.h> | 5 | #include <stim/biomodels/centerline.h> |
6 | 6 | ||
7 | +/* | ||
8 | + | ||
9 | +*/ | ||
7 | 10 | ||
8 | namespace stim | 11 | namespace stim |
9 | { | 12 | { |
@@ -12,13 +15,13 @@ class cylinder | @@ -12,13 +15,13 @@ class cylinder | ||
12 | : public centerline<T> | 15 | : public centerline<T> |
13 | { | 16 | { |
14 | private: | 17 | private: |
15 | - stim::circle<T> s; //an arbitrary circle | ||
16 | - std::vector<stim::circle<T> > e; //an array of circles that store the centerline | 18 | + stim::circle<T> s; //an arbitrary circle |
19 | + std::vector<stim::circle<T> > e; //an array of circles that store the centerline | ||
17 | 20 | ||
18 | std::vector<stim::vec3<T> > norms; | 21 | std::vector<stim::vec3<T> > norms; |
19 | std::vector<stim::vec<T> > Us; | 22 | std::vector<stim::vec<T> > Us; |
20 | - std::vector<stim::vec<T> > mags; | ||
21 | - std::vector< T > L; //length of the cylinder at each position. | 23 | + std::vector<stim::vec<T> > mags; //stores a list of magnitudes for each point in the centerline (assuming mags[0] is the radius) |
24 | + std::vector< T > L; //length of the cylinder at each position (pre-integration) | ||
22 | 25 | ||
23 | 26 | ||
24 | using stim::centerline<T>::c; | 27 | using stim::centerline<T>::c; |
@@ -61,9 +64,9 @@ class cylinder | @@ -61,9 +64,9 @@ class cylinder | ||
61 | return; | 64 | return; |
62 | 65 | ||
63 | //calculate each L. | 66 | //calculate each L. |
64 | - L.resize(inP.size()); | ||
65 | - T temp = (T)0; | ||
66 | - L[0] = 0; | 67 | + L.resize(inP.size()); //the number of precomputed lengths will equal the number of points |
68 | + T temp = (T)0; //length up to that point | ||
69 | + L[0] = temp; | ||
67 | for(size_t i = 1; i < L.size(); i++) | 70 | for(size_t i = 1; i < L.size(); i++) |
68 | { | 71 | { |
69 | temp += (inP[i-1] - inP[i]).len(); | 72 | temp += (inP[i-1] - inP[i]).len(); |
@@ -234,7 +237,7 @@ class cylinder | @@ -234,7 +237,7 @@ class cylinder | ||
234 | cylinder(std::vector< stim::vec3<T> > inP) | 237 | cylinder(std::vector< stim::vec3<T> > inP) |
235 | : centerline<T>(inP) | 238 | : centerline<T>(inP) |
236 | { | 239 | { |
237 | - std::vector< T > inM; //create an array of arbitrary magnitudes | 240 | + std::vector< stim::vec<T> > inM; //create an array of arbitrary magnitudes |
238 | 241 | ||
239 | stim::vec<T> zero; | 242 | stim::vec<T> zero; |
240 | zero.push_back(0); | 243 | zero.push_back(0); |
@@ -476,30 +479,30 @@ class cylinder | @@ -476,30 +479,30 @@ class cylinder | ||
476 | 479 | ||
477 | std::vector< vec3<T> > result; | 480 | std::vector< vec3<T> > result; |
478 | 481 | ||
479 | - vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline | 482 | + vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline |
480 | vec3<T> p1; | 483 | vec3<T> p1; |
481 | - unsigned N = size(); //number of points in the current centerline | 484 | + unsigned N = size(); //number of points in the current centerline |
482 | 485 | ||
483 | //for each line segment on the centerline | 486 | //for each line segment on the centerline |
484 | for(unsigned int i = 1; i < N; i++){ | 487 | for(unsigned int i = 1; i < N; i++){ |
485 | - p1 = e[i].P; //get the second point in the line segment | 488 | + p1 = e[i].P; //get the second point in the line segment |
486 | 489 | ||
487 | - vec3<T> v = p1 - p0; //calculate the vector between these two points | ||
488 | - T d = v.len(); //calculate the distance between these two points (length of the line segment) | 490 | + vec3<T> v = p1 - p0; //calculate the vector between these two points |
491 | + T d = v.len(); //calculate the distance between these two points (length of the line segment) | ||
489 | 492 | ||
490 | size_t nsteps = (size_t)std::ceil(d / spacing); //calculate the number of steps to take along the segment to meet the spacing criteria | 493 | size_t nsteps = (size_t)std::ceil(d / spacing); //calculate the number of steps to take along the segment to meet the spacing criteria |
491 | - T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points | 494 | + T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points |
492 | 495 | ||
493 | //for each step along the line segment | 496 | //for each step along the line segment |
494 | for(unsigned s = 0; s < nsteps; s++){ | 497 | for(unsigned s = 0; s < nsteps; s++){ |
495 | - T alpha = stepsize * s; //calculate the fraction of the distance along the line segment covered | ||
496 | - result.push_back(p0 + alpha * v); //push the point at alpha position along the line segment | 498 | + T alpha = stepsize * s; //calculate the fraction of the distance along the line segment covered |
499 | + result.push_back(p0 + alpha * v); //push the point at alpha position along the line segment | ||
497 | } | 500 | } |
498 | 501 | ||
499 | - p0 = p1; //shift the points to move to the next line segment | 502 | + p0 = p1; //shift the points to move to the next line segment |
500 | } | 503 | } |
501 | 504 | ||
502 | - result.push_back(e[size() - 1].P); //push the last point in the centerline | 505 | + result.push_back(e[size() - 1].P); //push the last point in the centerline |
503 | 506 | ||
504 | return cylinder<T>(result); | 507 | return cylinder<T>(result); |
505 | 508 |