Commit 39a92d0390bbd9240b2493d5a98afd9f17f0633b
Merge branch 'master' of git.stim.ee.uh.edu:codebase/stimlib into Graph
Showing
51 changed files
with
5615 additions
and
339 deletions
Show diff stats
1 | +# | |
2 | +# Windows users: define the GLEW_PATH environment variable to point | |
3 | +# to the directory containing: | |
4 | +# include/fann.h | |
5 | +# lib/*fann.lib | |
6 | + | |
7 | + | |
8 | +# FANN_FOUND - system has fann | |
9 | +# FANN_INCLUDE_DIRS - the fann include directory | |
10 | +# FANN_LIBRARIES - Link these to use fann | |
11 | +# FANN_DEFINITIONS - Compiler switches required for using fann | |
12 | +# | |
13 | + | |
14 | +if(FANN_LIBRARIES AND FANN_INCLUDE_DIRS) | |
15 | + set(FANN_FOUND TRUE) | |
16 | +else() | |
17 | + find_path(FANN_INCLUDE_DIR | |
18 | + NAMES | |
19 | + fann.h | |
20 | + PATHS | |
21 | + $ENV{FANN_PATH}/include | |
22 | + ${FANN_DIR}/include | |
23 | + /usr/include | |
24 | + /usr/local/include | |
25 | + /opt/local/include | |
26 | + /sw/include | |
27 | + ) | |
28 | + | |
29 | + set( _libraries fann doublefann fixedfann floatfann ) | |
30 | + | |
31 | + foreach( _lib ${_libraries} ) | |
32 | + string( TOUPPER ${_lib} _name ) | |
33 | + | |
34 | + find_library(${_name}_LIBRARY | |
35 | + NAMES | |
36 | + ${_lib} | |
37 | + PATHS | |
38 | + $ENV{FANN_PATH}/lib | |
39 | + ${FANN_DIR}/lib | |
40 | + /usr/lib | |
41 | + /usr/local/lib | |
42 | + /opt/local/lib | |
43 | + /sw/lib | |
44 | + ) | |
45 | + | |
46 | + endforeach() | |
47 | + | |
48 | + | |
49 | + set(FANN_INCLUDE_DIRS | |
50 | + ${FANN_INCLUDE_DIR} | |
51 | + ) | |
52 | + | |
53 | + set(FANN_LIBRARIES | |
54 | + ${FANN_LIBRARIES} | |
55 | + ${FANN_LIBRARY} | |
56 | + ${DOUBLEFANN_LIBRARY} | |
57 | + ${FIXEDFANN_LIBRARY} | |
58 | + ${FLOATFANN_LIBRARY} | |
59 | + ) | |
60 | + | |
61 | + if( UNIX ) | |
62 | + set( FANN_LIBRARIES ${FANN_LIBRARIES} m ) | |
63 | + endif() | |
64 | + | |
65 | + if(FANN_INCLUDE_DIRS AND FANN_LIBRARIES) | |
66 | + set(FANN_FOUND TRUE) | |
67 | + endif() | |
68 | + | |
69 | + if(FANN_FOUND) | |
70 | + if(NOT FANN_FIND_QUIETLY) | |
71 | + message(STATUS "Found FANN:") | |
72 | + message(STATUS "FANN_INCLUDE_DIRS: ${FANN_INCLUDE_DIRS}") | |
73 | + message(STATUS "FANN_LIBRARIES: ${FANN_LIBRARIES}") | |
74 | + endif() | |
75 | + else() | |
76 | + if(FANN_FIND_REQUIRED) | |
77 | + message(FATAL_ERROR "Could not find FANN") | |
78 | + endif() | |
79 | + endif() | |
80 | + | |
81 | + mark_as_advanced(FANN_INCLUDE_DIRS FANN_LIBRARIES) | |
82 | +endif() | ... | ... |
1 | +# Copyright (c) 2012-2016 DreamWorks Animation LLC | |
2 | +# | |
3 | +# All rights reserved. This software is distributed under the | |
4 | +# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ ) | |
5 | +# | |
6 | +# Redistributions of source code must retain the above copyright | |
7 | +# and license notice and the following restrictions and disclaimer. | |
8 | +# | |
9 | +# * Neither the name of DreamWorks Animation nor the names of | |
10 | +# its contributors may be used to endorse or promote products derived | |
11 | +# from this software without specific prior written permission. | |
12 | +# | |
13 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
14 | +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
15 | +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
16 | +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
17 | +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL, | |
18 | +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
19 | +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
20 | +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
21 | +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | +# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE | |
25 | +# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00. | |
26 | +# | |
27 | + | |
28 | +#-*-cmake-*- | |
29 | +# - Find GLEW | |
30 | +# | |
31 | +# Author : Nicholas Yue yue.nicholas@gmail.com | |
32 | +# | |
33 | +# This auxiliary CMake file helps in find the GLEW headers and libraries | |
34 | +# | |
35 | +# GLEW_FOUND set if Glew is found. | |
36 | +# GLEW_INCLUDE_DIR GLEW's include directory | |
37 | +# GLEW_glew_LIBRARY GLEW libraries | |
38 | +# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context) | |
39 | + | |
40 | +FIND_PACKAGE ( PackageHandleStandardArgs ) | |
41 | + | |
42 | +FIND_PATH( GLEW_LOCATION include/GL/glew.h | |
43 | + "$ENV{GLEW_ROOT}" | |
44 | + NO_DEFAULT_PATH | |
45 | + NO_SYSTEM_ENVIRONMENT_PATH | |
46 | + ) | |
47 | + | |
48 | +FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW | |
49 | + REQUIRED_VARS GLEW_LOCATION | |
50 | + ) | |
51 | + | |
52 | +IF ( GLEW_LOCATION ) | |
53 | + | |
54 | + SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path") | |
55 | + | |
56 | + SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) | |
57 | + IF (GLEW_USE_STATIC_LIBS) | |
58 | + IF (APPLE) | |
59 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") | |
60 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | |
61 | + NO_DEFAULT_PATH | |
62 | + NO_SYSTEM_ENVIRONMENT_PATH | |
63 | + ) | |
64 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | |
65 | + NO_DEFAULT_PATH | |
66 | + NO_SYSTEM_ENVIRONMENT_PATH | |
67 | + ) | |
68 | + # MESSAGE ( "APPLE STATIC" ) | |
69 | + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} ) | |
70 | + ELSEIF (WIN32) | |
71 | + # Link library | |
72 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") | |
73 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib ) | |
74 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib ) | |
75 | + ELSE (APPLE) | |
76 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") | |
77 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | |
78 | + NO_DEFAULT_PATH | |
79 | + NO_SYSTEM_ENVIRONMENT_PATH | |
80 | + ) | |
81 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | |
82 | + NO_DEFAULT_PATH | |
83 | + NO_SYSTEM_ENVIRONMENT_PATH | |
84 | + ) | |
85 | + # MESSAGE ( "LINUX STATIC" ) | |
86 | + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} ) | |
87 | + ENDIF (APPLE) | |
88 | + ELSE () | |
89 | + IF (APPLE) | |
90 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib") | |
91 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib ) | |
92 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib ) | |
93 | + ELSEIF (WIN32) | |
94 | + # Link library | |
95 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") | |
96 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib ) | |
97 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib ) | |
98 | + # Load library | |
99 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll") | |
100 | + FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin | |
101 | + NO_DEFAULT_PATH | |
102 | + NO_SYSTEM_ENVIRONMENT_PATH | |
103 | + ) | |
104 | + FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin | |
105 | + NO_DEFAULT_PATH | |
106 | + NO_SYSTEM_ENVIRONMENT_PATH | |
107 | + ) | |
108 | + ELSE (APPLE) | |
109 | + # Unices | |
110 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | |
111 | + NO_DEFAULT_PATH | |
112 | + NO_SYSTEM_ENVIRONMENT_PATH | |
113 | + ) | |
114 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | |
115 | + NO_DEFAULT_PATH | |
116 | + NO_SYSTEM_ENVIRONMENT_PATH | |
117 | + ) | |
118 | + ENDIF (APPLE) | |
119 | + ENDIF () | |
120 | + # MUST reset | |
121 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES}) | |
122 | + | |
123 | + SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library") | |
124 | + SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library") | |
125 | + | |
126 | +ENDIF () | ... | ... |
1 | +#.rst: | |
2 | +# FindGLUT | |
3 | +# -------- | |
4 | +# | |
5 | +# try to find glut library and include files. | |
6 | +# | |
7 | +# IMPORTED Targets | |
8 | +# ^^^^^^^^^^^^^^^^ | |
9 | +# | |
10 | +# This module defines the :prop_tgt:`IMPORTED` targets: | |
11 | +# | |
12 | +# ``GLUT::GLUT`` | |
13 | +# Defined if the system has GLUT. | |
14 | +# | |
15 | +# Result Variables | |
16 | +# ^^^^^^^^^^^^^^^^ | |
17 | +# | |
18 | +# This module sets the following variables: | |
19 | +# | |
20 | +# :: | |
21 | +# | |
22 | +# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc. | |
23 | +# GLUT_LIBRARIES, the libraries to link against | |
24 | +# GLUT_FOUND, If false, do not try to use GLUT. | |
25 | +# | |
26 | +# Also defined, but not for general use are: | |
27 | +# | |
28 | +# :: | |
29 | +# | |
30 | +# GLUT_glut_LIBRARY = the full path to the glut library. | |
31 | +# GLUT_Xmu_LIBRARY = the full path to the Xmu library. | |
32 | +# GLUT_Xi_LIBRARY = the full path to the Xi Library. | |
33 | + | |
34 | +#============================================================================= | |
35 | +# Copyright 2001-2009 Kitware, Inc. | |
36 | +# | |
37 | +# Distributed under the OSI-approved BSD License (the "License"); | |
38 | +# see accompanying file Copyright.txt for details. | |
39 | +# | |
40 | +# This software is distributed WITHOUT ANY WARRANTY; without even the | |
41 | +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
42 | +# See the License for more information. | |
43 | +#============================================================================= | |
44 | +# (To distribute this file outside of CMake, substitute the full | |
45 | +# License text for the above reference.) | |
46 | + | |
47 | +if (WIN32) | |
48 | + find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h | |
49 | + PATHS $ENV{GLUT_ROOT_PATH}/include ) | |
50 | + | |
51 | + if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | |
52 | + find_library( GLUT_glut_LIBRARY NAMES freeglut | |
53 | + PATHS | |
54 | + $ENV{GLUT_ROOT_PATH}/lib/x64 | |
55 | + | |
56 | + NO_DEFAULT_PATH | |
57 | + ) | |
58 | + else( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | |
59 | + find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut | |
60 | + PATHS | |
61 | + ${OPENGL_LIBRARY_DIR} | |
62 | + $ENV{GLUT_ROOT_PATH}/lib | |
63 | + ) | |
64 | + endif( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | |
65 | + | |
66 | +else () | |
67 | + | |
68 | + if (APPLE) | |
69 | + find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR}) | |
70 | + find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX") | |
71 | + find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX") | |
72 | + | |
73 | + if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa) | |
74 | + add_library(GLUT::Cocoa UNKNOWN IMPORTED) | |
75 | + # Cocoa should always be a Framework, but we check to make sure. | |
76 | + if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$") | |
77 | + set_target_properties(GLUT::Cocoa PROPERTIES | |
78 | + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}") | |
79 | + else() | |
80 | + set_target_properties(GLUT::Cocoa PROPERTIES | |
81 | + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}") | |
82 | + endif() | |
83 | + endif() | |
84 | + else () | |
85 | + | |
86 | + if (BEOS) | |
87 | + | |
88 | + set(_GLUT_INC_DIR /boot/develop/headers/os/opengl) | |
89 | + set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86) | |
90 | + | |
91 | + else() | |
92 | + | |
93 | + find_library( GLUT_Xi_LIBRARY Xi | |
94 | + /usr/openwin/lib | |
95 | + ) | |
96 | + | |
97 | + find_library( GLUT_Xmu_LIBRARY Xmu | |
98 | + /usr/openwin/lib | |
99 | + ) | |
100 | + | |
101 | + if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi) | |
102 | + add_library(GLUT::Xi UNKNOWN IMPORTED) | |
103 | + set_target_properties(GLUT::Xi PROPERTIES | |
104 | + IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}") | |
105 | + endif() | |
106 | + | |
107 | + if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu) | |
108 | + add_library(GLUT::Xmu UNKNOWN IMPORTED) | |
109 | + set_target_properties(GLUT::Xmu PROPERTIES | |
110 | + IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}") | |
111 | + endif() | |
112 | + | |
113 | + endif () | |
114 | + | |
115 | + find_path( GLUT_INCLUDE_DIR GL/glut.h | |
116 | + /usr/include/GL | |
117 | + /usr/openwin/share/include | |
118 | + /usr/openwin/include | |
119 | + /opt/graphics/OpenGL/include | |
120 | + /opt/graphics/OpenGL/contrib/libglut | |
121 | + ${_GLUT_INC_DIR} | |
122 | + ) | |
123 | + | |
124 | + find_library( GLUT_glut_LIBRARY glut | |
125 | + /usr/openwin/lib | |
126 | + ${_GLUT_glut_LIB_DIR} | |
127 | + ) | |
128 | + | |
129 | + unset(_GLUT_INC_DIR) | |
130 | + unset(_GLUT_glut_LIB_DIR) | |
131 | + | |
132 | + endif () | |
133 | + | |
134 | +endif () | |
135 | + | |
136 | +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR) | |
137 | + | |
138 | +if (GLUT_FOUND) | |
139 | + # Is -lXi and -lXmu required on all platforms that have it? | |
140 | + # If not, we need some way to figure out what platform we are on. | |
141 | + set( GLUT_LIBRARIES | |
142 | + ${GLUT_glut_LIBRARY} | |
143 | + ${GLUT_Xmu_LIBRARY} | |
144 | + ${GLUT_Xi_LIBRARY} | |
145 | + ${GLUT_cocoa_LIBRARY} | |
146 | + ) | |
147 | + | |
148 | + if(NOT TARGET GLUT::GLUT) | |
149 | + add_library(GLUT::GLUT UNKNOWN IMPORTED) | |
150 | + set_target_properties(GLUT::GLUT PROPERTIES | |
151 | + INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}") | |
152 | + if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$") | |
153 | + set_target_properties(GLUT::GLUT PROPERTIES | |
154 | + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}") | |
155 | + else() | |
156 | + set_target_properties(GLUT::GLUT PROPERTIES | |
157 | + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}") | |
158 | + endif() | |
159 | + | |
160 | + if(TARGET GLUT::Xmu) | |
161 | + set_property(TARGET GLUT::GLUT APPEND | |
162 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu) | |
163 | + endif() | |
164 | + | |
165 | + if(TARGET GLUT::Xi) | |
166 | + set_property(TARGET GLUT::GLUT APPEND | |
167 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi) | |
168 | + endif() | |
169 | + | |
170 | + if(TARGET GLUT::Cocoa) | |
171 | + set_property(TARGET GLUT::GLUT APPEND | |
172 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa) | |
173 | + endif() | |
174 | + endif() | |
175 | + | |
176 | + #The following deprecated settings are for backwards compatibility with CMake1.4 | |
177 | + set (GLUT_LIBRARY ${GLUT_LIBRARIES}) | |
178 | + set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR}) | |
179 | +endif() | |
180 | + | |
181 | +mark_as_advanced( | |
182 | + GLUT_INCLUDE_DIR | |
183 | + GLUT_glut_LIBRARY | |
184 | + GLUT_Xmu_LIBRARY | |
185 | + GLUT_Xi_LIBRARY | |
186 | + ) | ... | ... |
cmake/FindSTIM.cmake
1 | -include(FindPackageHandleStandardArgs) | |
2 | - | |
3 | -set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH}) | |
4 | - | |
5 | -find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) | |
6 | - | |
7 | -if(STIM_FOUND) | |
8 | - set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR}) | |
9 | -endif() | |
10 | 1 | \ No newline at end of file |
2 | +# finds the STIM library (downloads it if it isn't present) | |
3 | +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository) | |
4 | + | |
5 | +include(FindPackageHandleStandardArgs) | |
6 | + | |
7 | +set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH}) | |
8 | + | |
9 | +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) | |
10 | + | |
11 | +if(STIM_FOUND) | |
12 | + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR}) | |
13 | +elseif(STIM_FOUND) | |
14 | + #if the STIM library isn't found, download it | |
15 | + #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists | |
16 | + #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git") | |
17 | + #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) | |
18 | + #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH) | |
19 | + message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.") | |
20 | + message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib") | |
21 | +endif(STIM_FOUND) | |
22 | + | |
23 | +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) | ... | ... |
1 | +function T = bsq2tensorflow(I, n) | |
2 | + | |
3 | + sx = size(I, 1); | |
4 | + sy = size(I, 2) / n; %get the size of the tensor along Y | |
5 | + sb = size(I, 3); | |
6 | + | |
7 | + T = zeros(sx * sy * sb, n); %allocate space for the output matrix | |
8 | + for i = 0:n-1 | |
9 | + ti = I(:, i * sy + 1 : i * sy + sy, :); | |
10 | + T(:, i+1) = ti(:); | |
11 | + end | |
12 | +end | |
13 | + | |
14 | + | |
0 | 15 | \ No newline at end of file | ... | ... |
matlab/enviLoadRaw.m
matlab/enviSaveRaw.m
1 | +% Read images of TIFF, SPE2.2(WinSpec) and SPE3.0(Lightfield) | |
2 | +% Version: JTL Jun-9-2016 | |
3 | +% ----------------- READ THIS FIRST !!!!! -------------------------------- | |
4 | +% Change the file name to "readspe" before use | |
5 | +% Example: | |
6 | +% Z = readspe(filename) | |
7 | +% Z = readspe(filename,'info') | |
8 | +% Z = readspe(filename,frame_index) | |
9 | +% Z = readspe(filename,frame_index,'info') | |
10 | +% Input: | |
11 | +% filename - filename string, e.g. 'image.spe' | |
12 | +% frame_index - frame index, start from 1 | |
13 | +% If you have multiple frames, use a "for" loop | |
14 | +% 'info' - flag to show file info, i.e. dimension, number of frames, version | |
15 | +% Output: | |
16 | +% Z - UINT16 image (convert to double if you need) | |
17 | +% ------------------------------------------------------------------------ | |
18 | +% Z = readspe (filename,frame_index,'info') | |
19 | +function Z = readspe (filename,varargin) | |
20 | + | |
21 | +if exist(filename) == 2 | |
22 | + | |
23 | + Nfr = 1; % default read first frame | |
24 | + if nargin >1 | |
25 | + if isa(varargin{1},'numeric') | |
26 | + Nfr = varargin{1}; | |
27 | + end | |
28 | + end | |
29 | + | |
30 | + [~,name,ext] = fileparts(filename); | |
31 | + switch upper(ext) | |
32 | + case '.TIFF' | |
33 | + file_ver = 'TIFF'; | |
34 | + Z = imread(filename); | |
35 | + [Y,X] = size(Z); | |
36 | + % datatype = class(Z) | |
37 | + | |
38 | + case '.SPE' | |
39 | + fid = fopen(filename); | |
40 | + I = fread(fid,Inf,'uint8'); | |
41 | + X = double(typecast(uint8(I(43:44)),'uint16')); | |
42 | + Y = double(typecast(uint8(I(657:658)),'uint16')); | |
43 | + fr = typecast(uint8(I(1447:1450)),'int32'); | |
44 | + spe_ver = typecast(uint8(I(1993:1996)),'single'); | |
45 | + file_ver = ['SPE ' num2str(spe_ver)]; | |
46 | + datatypeN = typecast(uint8(I(109:110)),'int16'); | |
47 | + switch datatypeN | |
48 | + case 0 % 32-bit float | |
49 | + datatype = 'single'; datalength = 4; | |
50 | + case 1 % 32-bit signed integer | |
51 | + datatype = 'int32'; datalength = 4; | |
52 | + case 2 % 16-bit signed integer | |
53 | + datatype = 'int16'; datalength = 2; | |
54 | + case 3 % 16-bit unsigned integer | |
55 | + datatype = 'uint16'; datalength = 2; | |
56 | + case 8 % 32-bit unsigned integer | |
57 | + datatype = 'uint32'; datalength = 4; | |
58 | + end | |
59 | + % A = I(4101:4100+X*Y*2); % Default read first frame | |
60 | + A = I(4101+X*Y*datalength*(Nfr-1):4100+X*Y*datalength*Nfr); | |
61 | + B = typecast(uint8(A),datatype); % important | |
62 | + Z = reshape(B,X,Y); | |
63 | + Z = Z'; | |
64 | + fclose(fid); | |
65 | + end | |
66 | + | |
67 | + if nargin >1 | |
68 | + if varargin{end} == 'info' | |
69 | + display(['X = ' num2str(X)]); | |
70 | + display(['Y = ' num2str(Y)]); | |
71 | + if(exist('fr','var'));display(['Number of Frames: ' num2str(fr)]);end; | |
72 | + display(['File version: ' file_ver]); | |
73 | + end | |
74 | + end | |
75 | + | |
76 | +elseif exist(filename) == 0 | |
77 | + display('File does not exist!'); | |
78 | +end | |
0 | 79 | \ No newline at end of file | ... | ... |
1 | +function spe2envi(filemask, outfile) | |
2 | + | |
3 | + filelist = dir(filemask); | |
4 | + | |
5 | + %get a list of date numbers | |
6 | + datenums = cell2mat({filelist.datenum}); | |
7 | + | |
8 | + %sort the file order based on acquisition time | |
9 | + [~, id] = sort(datenums); | |
10 | + | |
11 | + %get the number of files | |
12 | + Y = length(id); %size of the image along Y | |
13 | + | |
14 | + %load the first file to determine the spectral and X-axis size | |
15 | + temp = readspe(filelist(1).name); | |
16 | + X = size(temp, 1); %size of the image along X | |
17 | + B = size(temp, 2); %number of bands in the image | |
18 | + | |
19 | + %create the cube | |
20 | + I = zeros(X, Y, B); | |
21 | + | |
22 | + %for each line | |
23 | + for y = 1:Y | |
24 | + | |
25 | + %read a SPE file | |
26 | + img = readspe(filelist(id(y)).name); | |
27 | + | |
28 | + I(:, y, :) = permute(img, [1 3 2]); | |
29 | + end | |
30 | + | |
31 | + enviSaveRaw(single(I), outfile, [outfile '.hdr']); | |
32 | + | |
33 | + | |
34 | + | ... | ... |
matlab/brewermap.m renamed to matlab/stimBrewerMap.m
1 | +%Loads a standard Agilent ResPro binary file | |
2 | +% stimLoadAgilent(filename) | |
3 | +function S = stimLoadAgilent(filename) | |
4 | + | |
5 | + fid = fopen(filename); | |
6 | + fseek(fid, 9, 'bof'); | |
7 | + Z = fread(fid, 1, 'uint16'); | |
8 | + fseek(fid, 13, 'cof'); | |
9 | + X = fread(fid, 1, 'uint16'); | |
10 | + Y = fread(fid, 1, 'uint16'); | |
11 | + | |
12 | + fseek(fid, 1020, 'bof'); | |
13 | + | |
14 | + S = reshape(fread(fid, [X, Y * Z], 'float32'), [X, Y, Z]); | |
15 | + | |
16 | + | |
0 | 17 | \ No newline at end of file | ... | ... |
1 | +function [TPR, FPR, AUC] = stimROC(C, T) | |
2 | +%build an ROC curve | |
3 | +% C - class labels as an array of binary values (1 = true positive) | |
4 | +% T - threshold used for classification | |
5 | + | |
6 | + %sort the thresholds in descending order and get the indices | |
7 | + [~, I] = sort(T, 'descend'); | |
8 | + | |
9 | + %sort the class labels in the same order as the thresholds | |
10 | + Cs = C(I); | |
11 | + | |
12 | + %calculate the number of measurements | |
13 | + M = size(C, 2); | |
14 | + | |
15 | + %calculate the number of positives | |
16 | + P = nnz(C); | |
17 | + | |
18 | + %calculate the number of negatives | |
19 | + N = M - P; | |
20 | + | |
21 | + %if all examples are positives or negatives, return a perfect score? | |
22 | + if P == M | |
23 | + error('ERROR: no positive observations'); | |
24 | + end | |
25 | + if P == 0 | |
26 | + error('ERROR: no negative observations'); | |
27 | + end | |
28 | + | |
29 | + %allocate space for the ROC curve | |
30 | + TPR = zeros(1, M); | |
31 | + FPR = zeros(1, M); | |
32 | + | |
33 | + | |
34 | + | |
35 | + %calculate the number of inflection points | |
36 | + ip = 0; | |
37 | + for i = 2:M | |
38 | + if Cs(i) ~= Cs(i-1) | |
39 | + ip = ip + 1; | |
40 | + end | |
41 | + end | |
42 | + | |
43 | + %initialize the true and false positive rates to zero | |
44 | + TP = 0; | |
45 | + FP = 0; | |
46 | + for i = 1:M | |
47 | + if Cs(i) == 1 | |
48 | + TP = TP + 1; | |
49 | + else | |
50 | + FP = FP + 1; | |
51 | + end | |
52 | + | |
53 | + TPR(i) = TP / P; | |
54 | + FPR(i) = FP / N; | |
55 | + end | |
56 | + | |
57 | + %calculate the area under the ROC curve | |
58 | + AUC = 0; | |
59 | + for i = 2:M | |
60 | + w = FPR(i) - FPR(i-1); | |
61 | + h = TPR(i); | |
62 | + AUC = AUC + w * h; | |
63 | + end | |
64 | + | |
65 | + | |
66 | + | |
67 | + | |
68 | + | |
69 | + | |
0 | 70 | \ No newline at end of file | ... | ... |
1 | +#!/usr/bin/python3 | |
2 | + | |
3 | +#import system processes | |
4 | +import subprocess, sys | |
5 | + | |
6 | +if len(sys.argv) > 1: | |
7 | + infile = int(sys.argv[1]) | |
8 | + | |
9 | +basefile = infile + "-base" | |
10 | +normfile = infile + "-norm" | |
11 | + | |
12 | +runcommand = "hsiproc " + infile + basefile + " --baseline baseline.txt" | |
13 | +subprocess.call(runcommand, shell=True) | |
0 | 14 | \ No newline at end of file | ... | ... |
stim/biomodels/cellset.h
... | ... | @@ -117,7 +117,7 @@ public: |
117 | 117 | } |
118 | 118 | |
119 | 119 | /// Return the maximum value of a field in this cell set |
120 | - double max(std::string field){ | |
120 | + double maximum(std::string field){ | |
121 | 121 | size_t idx = fields[field]; //get the field index |
122 | 122 | size_t ncells = cells.size(); //get the total number of cells |
123 | 123 | double maxval, val; //stores the current and maximum values |
... | ... | @@ -130,7 +130,7 @@ public: |
130 | 130 | } |
131 | 131 | |
132 | 132 | /// Return the maximum value of a field in this cell set |
133 | - double min(std::string field){ | |
133 | + double minimum(std::string field){ | |
134 | 134 | size_t idx = fields[field]; //get the field index |
135 | 135 | size_t ncells = cells.size(); //get the total number of cells |
136 | 136 | double minval, val; //stores the current and maximum values | ... | ... |
stim/biomodels/network.h
... | ... | @@ -11,8 +11,8 @@ |
11 | 11 | #include <stim/math/vec3.h> |
12 | 12 | #include <stim/visualization/obj.h> |
13 | 13 | #include <stim/visualization/cylinder.h> |
14 | -#include <ANN/ANN.h> | |
15 | -#include <boost/tuple/tuple.hpp> | |
14 | +#include <stim/structures/kdtree.cuh> | |
15 | +#include <stim/cuda/cudatools/timer.h> | |
16 | 16 | |
17 | 17 | |
18 | 18 | namespace stim{ |
... | ... | @@ -35,7 +35,7 @@ class network{ |
35 | 35 | // default constructor |
36 | 36 | edge() : cylinder<T>() |
37 | 37 | { |
38 | - v[1] = -1; v[0] = -1; | |
38 | + v[1] = (unsigned)(-1); v[0] = (unsigned)(-1); | |
39 | 39 | } |
40 | 40 | /// Constructor - creates an edge from a list of points by calling the stim::fiber constructor |
41 | 41 | |
... | ... | @@ -57,7 +57,7 @@ class network{ |
57 | 57 | /// Output the edge information as a string |
58 | 58 | std::string str(){ |
59 | 59 | std::stringstream ss; |
60 | - ss<<"("<<cylinder<T>::size()<<")\tl = "<<this.length()<<"\t"<<v[0]<<"----"<<v[1]; | |
60 | + ss<<"("<<cylinder<T>::size()<<")\tl = "<<this->length()<<"\t"<<v[0]<<"----"<<v[1]; | |
61 | 61 | return ss.str(); |
62 | 62 | } |
63 | 63 | |
... | ... | @@ -125,7 +125,9 @@ public: |
125 | 125 | return V.size(); |
126 | 126 | } |
127 | 127 | |
128 | - std::vector<vertex> operator*(T s){ | |
128 | + //scale the network by some constant value | |
129 | + // I don't think these work?????? | |
130 | + /*std::vector<vertex> operator*(T s){ | |
129 | 131 | for (unsigned i=0; i< vertices; i ++ ){ |
130 | 132 | V[i] = V[i] * s; |
131 | 133 | } |
... | ... | @@ -139,10 +141,9 @@ public: |
139 | 141 | } |
140 | 142 | } |
141 | 143 | return V; |
142 | - } | |
144 | + }*/ | |
143 | 145 | |
144 | 146 | // Returns an average of branching index in the network |
145 | - | |
146 | 147 | double BranchingIndex(){ |
147 | 148 | double B=0; |
148 | 149 | for(unsigned v=0; v < V.size(); v ++){ |
... | ... | @@ -154,7 +155,6 @@ public: |
154 | 155 | } |
155 | 156 | |
156 | 157 | // Returns number of branch points in thenetwork |
157 | - | |
158 | 158 | unsigned int BranchP(){ |
159 | 159 | unsigned int B=0; |
160 | 160 | unsigned int c; |
... | ... | @@ -168,7 +168,6 @@ public: |
168 | 168 | } |
169 | 169 | |
170 | 170 | // Returns number of end points (tips) in thenetwork |
171 | - | |
172 | 171 | unsigned int EndP(){ |
173 | 172 | unsigned int B=0; |
174 | 173 | unsigned int c; |
... | ... | @@ -202,10 +201,11 @@ public: |
202 | 201 | // return s; |
203 | 202 | //} |
204 | 203 | |
205 | - | |
204 | + //Calculate Metrics--------------------------------------------------- | |
206 | 205 | // Returns an average of fiber/edge lengths in the network |
207 | 206 | double Lengths(){ |
208 | - stim::vec<T> L;double sumLength = 0; | |
207 | + stim::vec<T> L; | |
208 | + double sumLength = 0; | |
209 | 209 | for(unsigned e = 0; e < E.size(); e++){ //for each edge in the network |
210 | 210 | L.push_back(E[e].length()); //append the edge length |
211 | 211 | sumLength = sumLength + E[e].length(); |
... | ... | @@ -269,8 +269,10 @@ public: |
269 | 269 | double avg = sumFractDim / E.size(); |
270 | 270 | return avg; |
271 | 271 | } |
272 | - stim::cylinder<T> get_cylinder(unsigned f){ | |
273 | - return E[f]; //return the specified edge (casting it to a fiber) | |
272 | + | |
273 | + //returns a cylinder represented a given fiber (based on edge index) | |
274 | + stim::cylinder<T> get_cylinder(unsigned e){ | |
275 | + return E[e]; //return the specified edge (casting it to a fiber) | |
274 | 276 | } |
275 | 277 | |
276 | 278 | //load a network from an OBJ file |
... | ... | @@ -385,11 +387,27 @@ public: |
385 | 387 | return n; |
386 | 388 | } |
387 | 389 | |
390 | + //Copy the point cloud representing the centerline for the network into an array | |
391 | + void centerline_cloud(T* dst) { | |
392 | + size_t p; //stores the current edge point | |
393 | + size_t P; //stores the number of points in an edge | |
394 | + size_t i = 0; //index into the output array of points | |
395 | + for (size_t e = 0; e < E.size(); e++) { //for each edge in the network | |
396 | + P = E[e].size(); //get the number of points in this edge | |
397 | + for (p = 0; p < P; p++) { | |
398 | + dst[i * 3 + 0] = E[e][p][0]; | |
399 | + dst[i * 3 + 1] = E[e][p][1]; | |
400 | + dst[i * 3 + 2] = E[e][p][2]; | |
401 | + i++; | |
402 | + } | |
403 | + } | |
404 | + } | |
405 | + | |
388 | 406 | // gaussian function |
389 | 407 | float gaussianFunction(float x, float std=25){ return exp(-x/(2*std*std));} // by default std = 25 |
390 | 408 | |
391 | - // stim 3d vector to annpoint of 3 dimensions | |
392 | - void stim2ann(ANNpoint &a, stim::vec3<T> b){ | |
409 | + // convert vec3 to array | |
410 | + void stim2array(float *a, stim::vec3<T> b){ | |
393 | 411 | a[0] = b[0]; |
394 | 412 | a[1] = b[1]; |
395 | 413 | a[2] = b[2]; |
... | ... | @@ -413,57 +431,81 @@ public: |
413 | 431 | |
414 | 432 | /// @param A is the network to compare to - the field is generated for A |
415 | 433 | /// @param sigma is the user-defined tolerance value - smaller values provide a stricter comparison |
416 | - stim::network<T> compare(stim::network<T> A, float sigma){ | |
434 | + stim::network<T> compare(stim::network<T> A, float sigma, int device){ | |
417 | 435 | |
418 | - stim::network<T> R; //generate a network storing the result of the comparison | |
419 | - R = (*this); //initialize the result with the current network | |
436 | + stim::network<T> R; //generate a network storing the result of the comparison | |
437 | + R = (*this); //initialize the result with the current network | |
420 | 438 | |
421 | - //generate a KD-tree for network A | |
422 | - float metric = 0.0; // initialize metric to be returned after comparing the networks | |
423 | - ANNkd_tree* kdt; // initialize a pointer to a kd tree | |
424 | - double **c; // centerline (array of double pointers) - points on kdtree must be double | |
425 | - unsigned int n_data = A.total_points(); // set the number of points | |
426 | - c = (double**) malloc(sizeof(double*) * n_data); // allocate the array pointer | |
427 | - for(unsigned int i = 0; i < n_data; i++) // allocate space for each point of 3 dimensions | |
428 | - c[i] = (double*) malloc(sizeof(double) * 3); | |
439 | + T *c; // centerline (array of double pointers) - points on kdtree must be double | |
440 | + size_t n_data = A.total_points(); // set the number of points | |
441 | + c = (T*) malloc(sizeof(T) * n_data * 3); //allocate an array to store all points in the data set | |
429 | 442 | |
430 | 443 | unsigned t = 0; |
431 | - for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network | |
432 | - for(unsigned p = 0; p < A.E[e].size(); p++){ //for each point in the edge | |
444 | + for(unsigned e = 0; e < A.E.size(); e++){ //for each edge in the network | |
445 | + for(unsigned p = 0; p < A.E[e].size(); p++){ //for each point in the edge | |
433 | 446 | for(unsigned d = 0; d < 3; d++){ //for each coordinate |
434 | 447 | |
435 | - c[t][d] = A.E[e][p][d]; | |
448 | + c[t * 3 + d] = A.E[e][p][d]; //copy the point into the array c | |
436 | 449 | } |
437 | 450 | t++; |
438 | 451 | } |
439 | 452 | } |
440 | 453 | |
454 | + //generate a KD-tree for network A | |
455 | + //float metric = 0.0; // initialize metric to be returned after comparing the network | |
456 | + size_t MaxTreeLevels = 3; // max tree level | |
457 | + | |
458 | +#ifdef __CUDACC__ | |
459 | + cudaSetDevice(device); | |
460 | + stim::cuda_kdtree<T, 3> kdt; // initialize a pointer to a kd tree | |
461 | + | |
441 | 462 | //compare each point in the current network to the field produced by A |
442 | - ANNpointArray pts = (ANNpointArray)c; // create an array of data points of type double | |
443 | - kdt = new ANNkd_tree(pts, n_data, 3); // build a KD tree using the annpointarray | |
444 | - double eps = 0; // error bound | |
445 | - ANNdistArray dists = new ANNdist[1]; // near neighbor distances | |
446 | - ANNidxArray nnIdx = new ANNidx[1]; // near neighbor indices // allocate near neigh indices | |
463 | + kdt.create(c, n_data, MaxTreeLevels); // build a KD tree | |
464 | + T *dists = new T[1]; // near neighbor distances | |
465 | + size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices | |
447 | 466 | |
448 | 467 | stim::vec3<T> p0, p1; |
449 | - float m1; | |
450 | - float M = 0; //stores the total metric value | |
451 | - float L = 0; //stores the total network length | |
452 | - ANNpoint queryPt = annAllocPt(3); | |
468 | + T m1; | |
469 | + //float M = 0; //stores the total metric value | |
470 | + //float L = 0; //stores the total network length | |
471 | + T* queryPt = new T[3]; | |
453 | 472 | for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A |
454 | 473 | R.E[e].add_mag(0); //add a new magnitude for the metric |
455 | 474 | |
456 | 475 | for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge |
457 | 476 | |
458 | 477 | p1 = R.E[e][p]; //get the next point in the edge |
459 | - stim2ann(queryPt, p1); | |
460 | - kdt->annkSearch( queryPt, 1, nnIdx, dists, eps); //find the distance between A and the current network | |
461 | - m1 = 1.0f - gaussianFunction((float)dists[0], sigma); //calculate the metric value based on the distance | |
478 | + stim2array(queryPt, p1); | |
479 | + kdt.search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network | |
480 | + | |
481 | + m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance | |
462 | 482 | R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment |
463 | 483 | |
464 | 484 | } |
465 | 485 | } |
486 | +#else | |
487 | + stim::cpu_kdtree<T, 3> kdt; | |
488 | + kdt.create(c, n_data, MaxTreeLevels); | |
489 | + T *dists = new T[1]; // near neighbor distances | |
490 | + size_t *nnIdx = new size_t[1]; // near neighbor indices // allocate near neigh indices | |
491 | + | |
492 | + stim::vec3<T> p0, p1; | |
493 | + T m1; | |
494 | + T* queryPt = new T[3]; | |
495 | + for(unsigned e = 0; e < R.E.size(); e++){ //for each edge in A | |
496 | + R.E[e].add_mag(0); //add a new magnitude for the metric | |
497 | + | |
498 | + for(unsigned p = 0; p < R.E[e].size(); p++){ //for each point in the edge | |
466 | 499 | |
500 | + p1 = R.E[e][p]; //get the next point in the edge | |
501 | + stim2array(queryPt, p1); | |
502 | + kdt.cpu_search(queryPt, 1, nnIdx, dists); //find the distance between A and the current network | |
503 | + | |
504 | + m1 = 1.0f - gaussianFunction((T)dists[0], sigma); //calculate the metric value based on the distance | |
505 | + R.E[e].set_mag(m1, p, 1); //set the error for the second point in the segment | |
506 | + } | |
507 | + } | |
508 | +#endif | |
467 | 509 | return R; //return the resulting network |
468 | 510 | } |
469 | 511 | |
... | ... | @@ -487,7 +529,7 @@ public: |
487 | 529 | void load_txt(std::string filename) |
488 | 530 | { |
489 | 531 | std::vector <std::string> file_contents; |
490 | - std::ifstream file(filename); | |
532 | + std::ifstream file(filename.c_str()); | |
491 | 533 | std::string line; |
492 | 534 | std::vector<unsigned> id2vert; //this list stores the vertex ID associated with each network vertex |
493 | 535 | //for each line in the text file, store them as strings in file_contents |
... | ... | @@ -538,7 +580,7 @@ public: |
538 | 580 | for(unsigned int d = 0; d < 3; d++){ |
539 | 581 | ss<<p[i][d]; |
540 | 582 | } |
541 | - ss < "\n"; | |
583 | + ss << "\n"; | |
542 | 584 | } |
543 | 585 | return ss.str(); |
544 | 586 | } |
... | ... | @@ -552,8 +594,8 @@ public: |
552 | 594 | void |
553 | 595 | to_txt(std::string filename) |
554 | 596 | { |
555 | - std::ofstream ofs(filename, std::ofstream::out | std::ofstream::app); | |
556 | - int num; | |
597 | + std::ofstream ofs(filename.c_str(), std::ofstream::out | std::ofstream::app); | |
598 | + //int num; | |
557 | 599 | ofs << (E.size()).str() << "\n"; |
558 | 600 | for(unsigned int i = 0; i < E.size(); i++) |
559 | 601 | { |
... | ... | @@ -566,7 +608,8 @@ public: |
566 | 608 | { |
567 | 609 | std::string str; |
568 | 610 | str = V[i].str(); |
569 | - removeCharsFromString(str, "[],"); | |
611 | + char temp[4] = "[],"; | |
612 | + removeCharsFromString(str, temp); | |
570 | 613 | ofs << str << "\n"; |
571 | 614 | } |
572 | 615 | ofs.close(); | ... | ... |
stim/biomodels/network_dep.h
stim/cuda/cudatools/error.h
1 | +#ifndef STIM_CUDA_ERROR_H | |
2 | +#define STIM_CUDA_ERROR_H | |
3 | + | |
1 | 4 | #include <stdio.h> |
2 | 5 | #include <iostream> |
3 | 6 | using namespace std; |
4 | 7 | #include "cuda_runtime.h" |
5 | 8 | #include "device_launch_parameters.h" |
6 | 9 | #include "cufft.h" |
7 | - | |
8 | -#ifndef CUDA_HANDLE_ERROR_H | |
9 | -#define CUDA_HANDLE_ERROR_H | |
10 | +#include "cublas_v2.h" | |
10 | 11 | |
11 | 12 | //handle error macro |
12 | -static void HandleError( cudaError_t err, const char *file, int line ) { | |
13 | +static void cuHandleError( cudaError_t err, const char *file, int line ) { | |
13 | 14 | if (err != cudaSuccess) { |
14 | - //FILE* outfile = fopen("cudaErrorLog.txt", "w"); | |
15 | - //fprintf(outfile, "%s in %s at line %d\n", cudaGetErrorString( err ), file, line ); | |
16 | - //fclose(outfile); | |
17 | 15 | printf("%s in %s at line %d\n", cudaGetErrorString( err ), file, line ); |
18 | - //exit( EXIT_FAILURE ); | |
19 | 16 | |
20 | 17 | } |
21 | 18 | } |
22 | -#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) | |
19 | +#define HANDLE_ERROR( err ) (cuHandleError( err, __FILE__, __LINE__ )) | |
23 | 20 | |
24 | -static void CufftError( cufftResult err ) | |
21 | +static void cufftHandleError( cufftResult err, const char*file, int line ) | |
25 | 22 | { |
26 | 23 | if (err != CUFFT_SUCCESS) |
27 | 24 | { |
... | ... | @@ -42,7 +39,29 @@ static void CufftError( cufftResult err ) |
42 | 39 | |
43 | 40 | } |
44 | 41 | } |
42 | +#define CUFFT_HANDLE_ERROR( err ) (cufftHandleError( err, __FILE__, __LINE__ )) | |
45 | 43 | |
44 | +static void cublasHandleError( cublasStatus_t err, const char*file, int line ){ | |
45 | + if(err != CUBLAS_STATUS_SUCCESS){ | |
46 | + if(err == CUBLAS_STATUS_NOT_INITIALIZED) | |
47 | + std::cout<<"CUBLAS_STATUS_NOT_INITIALIZED" <<" in file "<<file<<" line "<<std::endl; | |
48 | + else if(err == CUBLAS_STATUS_ALLOC_FAILED) | |
49 | + std::cout<<"CUBLAS_STATUS_ALLOC_FAILED" <<" in file "<<file<<" line "<<std::endl; | |
50 | + else if(err == CUBLAS_STATUS_INVALID_VALUE) | |
51 | + std::cout<<"CUBLAS_STATUS_INVALID_VALUE" <<" in file "<<file<<" line "<<std::endl; | |
52 | + else if(err == CUBLAS_STATUS_ARCH_MISMATCH) | |
53 | + std::cout<<"CUBLAS_STATUS_ARCH_MISMATCH" <<" in file "<<file<<" line "<<std::endl; | |
54 | + else if(err == CUBLAS_STATUS_MAPPING_ERROR) | |
55 | + std::cout<<"CUBLAS_STATUS_MAPPING_ERROR" <<" in file "<<file<<" line "<<std::endl; | |
56 | + else if(err == CUBLAS_STATUS_EXECUTION_FAILED) | |
57 | + std::cout<<"CUBLAS_STATUS_EXECUTION_FAILED" <<" in file "<<file<<" line "<<std::endl; | |
58 | + else if(err == CUBLAS_STATUS_INTERNAL_ERROR) | |
59 | + std::cout<<"CUBLAS_STATUS_INTERNAL_ERROR" <<" in file "<<file<<" line "<<std::endl; | |
60 | + else | |
61 | + std::cout<<"Unknown error"<<" in file "<<file<<" line "<<std::endl; | |
62 | + } | |
63 | +} | |
64 | +#define CUBLAS_HANDLE_ERROR( err ) (cublasHandleError( err, __FILE__, __LINE__ )) | |
46 | 65 | |
47 | 66 | |
48 | 67 | #endif | ... | ... |
stim/envi/agilent_binary.h
... | ... | @@ -4,13 +4,15 @@ |
4 | 4 | |
5 | 5 | #include <string> |
6 | 6 | #include <fstream> |
7 | +#include <complex> | |
7 | 8 | |
8 | 9 | //CUDA |
9 | -#ifdef CUDA_FOUND | |
10 | - #include <cuda_runtime.h> | |
11 | - #include "cufft.h" | |
12 | - #include <stim/cuda/cudatools/error.h> | |
13 | -#endif | |
10 | +//#ifdef CUDA_FOUND | |
11 | +#include <cuda_runtime.h> | |
12 | +#include "cufft.h" | |
13 | +#include <stim/cuda/cudatools/error.h> | |
14 | +#include <stim/envi/envi_header.h> | |
15 | +//#endif | |
14 | 16 | |
15 | 17 | namespace stim{ |
16 | 18 | |
... | ... | @@ -19,10 +21,10 @@ class agilent_binary{ |
19 | 21 | |
20 | 22 | protected: |
21 | 23 | std::string fname; |
22 | - T* ptr; | |
23 | - size_t R[3]; | |
24 | - static const size_t header = 1020; | |
25 | - double Z[2]; | |
24 | + T* ptr; //pointer to the image data | |
25 | + size_t R[3]; //size of the binary image in X, Y, and Z | |
26 | + static const size_t header = 1020; //header size | |
27 | + double Z[2]; //range of z values (position or wavelength) | |
26 | 28 | |
27 | 29 | public: |
28 | 30 | size_t size(){ |
... | ... | @@ -42,6 +44,10 @@ public: |
42 | 44 | alloc(); |
43 | 45 | } |
44 | 46 | |
47 | + size_t dim(size_t i){ | |
48 | + return R[i]; | |
49 | + } | |
50 | + | |
45 | 51 | /// Create a deep copy of an agileng_binary object |
46 | 52 | void deep_copy(agilent_binary<T>* dst, const agilent_binary<T>* src){ |
47 | 53 | dst->alloc(src->R[0], src->R[1], src->R[2]); //allocate memory |
... | ... | @@ -136,6 +142,42 @@ public: |
136 | 142 | return header; |
137 | 143 | } |
138 | 144 | |
145 | + /// Subtract the mean from each pixel. Generally used for centering an interferogram. | |
146 | + void meancenter(){ | |
147 | + size_t Z = R[2]; //store the number of bands | |
148 | + size_t XY = R[0] * R[1]; //store the number of pixels in the image | |
149 | + T sum = (T)0; | |
150 | + T mean; | |
151 | + for(size_t xy = 0; xy < XY; xy++){ //for each pixel | |
152 | + sum = 0; | |
153 | + for(size_t z = 0; z < Z; z++){ //for each band | |
154 | + sum += ptr[ z * XY + xy ]; //add the band value to a running sum | |
155 | + } | |
156 | + mean = sum / (T)Z; //calculate the pixel mean | |
157 | + for(size_t z = 0; z < Z; z++){ | |
158 | + ptr[ z * XY + xy ] -= mean; //subtract the mean from each band | |
159 | + } | |
160 | + } | |
161 | + } | |
162 | + | |
163 | + /// adds n bands of zero padding to the end of the file | |
164 | + void zeropad(size_t n){ | |
165 | + size_t newZ = R[2] + n; | |
166 | + T* temp = (T*) calloc(R[0] * R[1] * newZ, sizeof(T)); //allocate space for the new image | |
167 | + memcpy(temp, ptr, size() * sizeof(T)); //copy the old data to the new image | |
168 | + | |
169 | + free(ptr); //free the old data | |
170 | + ptr = temp; //swap in the new data | |
171 | + R[2] = newZ; //set the z-dimension to the new zero value | |
172 | + } | |
173 | + | |
174 | + //pads to the nearest power-of-two | |
175 | + void zeropad(){ | |
176 | + size_t newZ = (size_t)pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two | |
177 | + size_t n = newZ - R[2]; //calculate the number of bands to add | |
178 | + zeropad(n); //add the padding | |
179 | + } | |
180 | + | |
139 | 181 | /// Calculate the absorbance spectrum from the transmission spectrum given a background |
140 | 182 | void absorbance(stim::agilent_binary<T>* background){ |
141 | 183 | size_t N = size(); //calculate the number of values to be ratioed |
... | ... | @@ -147,7 +189,7 @@ public: |
147 | 189 | ptr[i] = -log10(ptr[i] / background->ptr[i]); |
148 | 190 | } |
149 | 191 | |
150 | -#ifdef CUDA_FOUND | |
192 | +//#ifdef CUDA_FOUND | |
151 | 193 | /// Perform an FFT and return a binary file with bands in the specified range |
152 | 194 | agilent_binary<T> fft(double band_min, double band_max, double ELWN = 15798, int UDR = 2){ |
153 | 195 | auto total_start = std::chrono::high_resolution_clock::now(); |
... | ... | @@ -234,7 +276,22 @@ public: |
234 | 276 | |
235 | 277 | return result; |
236 | 278 | } |
237 | -#endif | |
279 | + | |
280 | + //saves the binary as an ENVI file with a BIP interleave format | |
281 | + int bip(T* bip_ptr){ | |
282 | + //std::ofstream out(outfile.c_str(), std::ios::binary); //create a binary file stream for output | |
283 | + size_t XY = R[0] * R[1]; | |
284 | + size_t B = R[2]; | |
285 | + size_t b; | |
286 | + | |
287 | + for(size_t xy = 0; xy < XY; xy++){ | |
288 | + for(b = 0; b < B; b++){ | |
289 | + bip_ptr[xy * B + b] = ptr[b * XY + xy]; | |
290 | + } | |
291 | + } | |
292 | + return 0; | |
293 | + } | |
294 | +//#endif | |
238 | 295 | |
239 | 296 | }; |
240 | 297 | ... | ... |
stim/envi/bil.h
... | ... | @@ -4,6 +4,7 @@ |
4 | 4 | #include "../envi/envi_header.h" |
5 | 5 | #include "../envi/hsi.h" |
6 | 6 | #include "../math/fd_coefficients.h" |
7 | +#include <stim/cuda/cudatools/error.h> | |
7 | 8 | #include <cstring> |
8 | 9 | #include <utility> |
9 | 10 | #include <deque> |
... | ... | @@ -118,7 +119,7 @@ public: |
118 | 119 | page++; |
119 | 120 | //if wavelength is larger than the last wavelength in header file |
120 | 121 | if (page == Z()) { |
121 | - band_index(p, Z()-1); | |
122 | + band_index(p, Z()-1, PROGRESS); | |
122 | 123 | return true; |
123 | 124 | } |
124 | 125 | } |
... | ... | @@ -224,10 +225,44 @@ public: |
224 | 225 | } |
225 | 226 | |
226 | 227 | //given a Y ,return a XZ slice |
227 | - bool read_plane_y(T * p, unsigned long long y){ | |
228 | + bool read_plane_xz(T * p, size_t y){ | |
228 | 229 | return binary<T>::read_plane_2(p, y); |
229 | 230 | } |
230 | 231 | |
232 | + //given a Y, return ZX slice (transposed such that the spectrum is the leading dimension) | |
233 | + int read_plane_zx(T* p, size_t y){ | |
234 | + T* temp = (T*) malloc(X() * Z() * sizeof(T)); //allocate space to store the temporary xz plane | |
235 | + binary<T>::read_plane_2(temp, y); //load the plane from disk | |
236 | + size_t z, x; | |
237 | + for(z = 0; z < Z(); z++){ | |
238 | + for(x = 0; x <= z; x++){ | |
239 | + p[x * Z() + z] = temp[z * X() + x]; //copy to the destination frame | |
240 | + } | |
241 | + } | |
242 | + } | |
243 | + | |
244 | + //load a frame y into a pre-allocated double-precision array | |
245 | + int read_plane_xzd(double* f, size_t y){ | |
246 | + size_t XB = X() * Z(); | |
247 | + T* temp = (T*) malloc(XB * sizeof(T)); //create a temporary location to store the plane at current precision | |
248 | + if(!read_plane_y(temp, y)) return 1; //read the plane in its native format, if it fails return a 1 | |
249 | + for(size_t i = 0; i < XB; i++) f[i] = temp[i]; //convert the plane to a double | |
250 | + return 0; | |
251 | + } | |
252 | + | |
253 | + //given a Y, return ZX slice (transposed such that the spectrum is the leading dimension) | |
254 | + int read_plane_zxd(double* p, size_t y){ | |
255 | + T* temp = (T*) malloc(X() * Z() * sizeof(T)); //allocate space to store the temporary xz plane | |
256 | + binary<T>::read_plane_2(temp, y); //load the plane from disk | |
257 | + size_t z, x; | |
258 | + for(z = 0; z < Z(); z++){ | |
259 | + for(x = 0; x < X(); x++){ | |
260 | + p[x * Z() + z] = (double)temp[z * X() + x]; //copy to the destination frame | |
261 | + } | |
262 | + } | |
263 | + return 0; | |
264 | + } | |
265 | + | |
231 | 266 | |
232 | 267 | /// Perform baseline correction given a list of baseline points and stores the result in a new BSQ file. |
233 | 268 | |
... | ... | @@ -268,7 +303,7 @@ public: |
268 | 303 | for (unsigned long long k =0; k < Y(); k++) |
269 | 304 | { |
270 | 305 | //get the current y slice |
271 | - read_plane_y(c, k); | |
306 | + read_plane_xz(c, k); | |
272 | 307 | |
273 | 308 | //initialize lownum, highnum, low, high |
274 | 309 | ai = w[0]; |
... | ... | @@ -369,7 +404,7 @@ public: |
369 | 404 | |
370 | 405 | for(unsigned long long j = 0; j < Y(); j++) |
371 | 406 | { |
372 | - read_plane_y(c, j); | |
407 | + read_plane_xz(c, j); | |
373 | 408 | for(unsigned long long i = 0; i < B; i++) |
374 | 409 | { |
375 | 410 | for(unsigned long long m = 0; m < X(); m++) |
... | ... | @@ -469,7 +504,7 @@ public: |
469 | 504 | |
470 | 505 | for ( unsigned long long i = 0; i < Y(); i++) |
471 | 506 | { |
472 | - read_plane_y(p, i); | |
507 | + read_plane_xz(p, i); | |
473 | 508 | for ( unsigned long long k = 0; k < Z(); k++) |
474 | 509 | { |
475 | 510 | unsigned long long ks = k * X(); |
... | ... | @@ -863,7 +898,7 @@ public: |
863 | 898 | |
864 | 899 | for (unsigned long long i = 0; i < Y(); i++) //for each value in Y() (BIP should be X) |
865 | 900 | { |
866 | - read_plane_y(temp, i); //retrieve an ZX slice, stored in temp | |
901 | + read_plane_xz(temp, i); //retrieve an ZX slice, stored in temp | |
867 | 902 | for ( unsigned long long j = 0; j < Z(); j++) //for each Z() (Y) |
868 | 903 | { |
869 | 904 | for (unsigned long long k = 0; k < X(); k++) //for each band |
... | ... | @@ -933,7 +968,7 @@ public: |
933 | 968 | //for each slice along the y axis |
934 | 969 | for (unsigned long long y = 0; y < Y(); y++) //Select a page by choosing Y coordinate, Y() |
935 | 970 | { |
936 | - read_plane_y(slice, y); //retrieve an ZX page, store in "slice" | |
971 | + read_plane_xz(slice, y); //retrieve an ZX page, store in "slice" | |
937 | 972 | |
938 | 973 | //for each sample along X |
939 | 974 | for (unsigned long long x = 0; x < X(); x++) //Select a pixel by choosing X coordinate in the page, X() |
... | ... | @@ -992,43 +1027,136 @@ public: |
992 | 1027 | |
993 | 1028 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum |
994 | 1029 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
995 | - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){ | |
1030 | + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){ | |
996 | 1031 | unsigned long long XZ = X() * Z(); |
997 | 1032 | unsigned long long XY = X() * Y(); |
998 | 1033 | T* temp = (T*)malloc(sizeof(T) * XZ); |
999 | - for (unsigned long long j = 0; j < Z(); j++){ | |
1000 | - p[j] = 0; | |
1001 | - } | |
1034 | + memset(m, 0, Z() * sizeof(double)); //initialize the mean to zero | |
1035 | + double* e_x2 = (double*)malloc(Z() * sizeof(double)); //allocate space for E[x^2] | |
1036 | + memset(e_x2, 0, Z() * sizeof(double)); //initialize E[x^2] to zero | |
1002 | 1037 | //calculate vaild number in a band |
1003 | - unsigned long long count = 0; | |
1004 | - for (unsigned long long j = 0; j < XY; j++){ | |
1005 | - if (mask == NULL || mask[j] != 0){ | |
1006 | - count++; | |
1007 | - } | |
1008 | - } | |
1038 | + size_t count = nnz(mask); //count the number of pixels in the mask | |
1039 | + | |
1040 | + double x; //create a register to store the pixel value | |
1009 | 1041 | for (unsigned long long k = 0; k < Y(); k++){ |
1010 | - read_plane_y(temp, k); | |
1042 | + read_plane_xz(temp, k); | |
1011 | 1043 | unsigned long long kx = k * X(); |
1012 | 1044 | for (unsigned long long i = 0; i < X(); i++){ |
1013 | 1045 | if (mask == NULL || mask[kx + i] != 0){ |
1014 | 1046 | for (unsigned long long j = 0; j < Z(); j++){ |
1015 | - p[j] += temp[j * X() + i] / (double)count; | |
1047 | + x = temp[j * X() + i]; | |
1048 | + m[j] += x / (double)count; | |
1049 | + e_x2[j] += x*x / (double)count; | |
1016 | 1050 | } |
1017 | 1051 | } |
1018 | 1052 | } |
1019 | 1053 | if(PROGRESS) progress = (double)(k+1) / Y() * 100; |
1020 | 1054 | } |
1055 | + | |
1056 | + for(size_t i = 0; i < Z(); i++) //calculate the standard deviation | |
1057 | + std[i] = sqrt(e_x2[i] - m[i] * m[i]); | |
1058 | + | |
1021 | 1059 | free(temp); |
1022 | 1060 | return true; |
1023 | 1061 | } |
1024 | 1062 | |
1063 | + int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1064 | + cublasStatus_t stat; | |
1065 | + cublasHandle_t handle; | |
1066 | + | |
1067 | + progress = 0; //initialize the progress to zero (0) | |
1068 | + size_t XY = X() * Y(); //calculate the number of elements in a band image | |
1069 | + size_t XB = X() * Z(); | |
1070 | + size_t B = Z(); //calculate the number of spectral elements | |
1071 | + | |
1072 | + double* F = (double*)malloc(sizeof(double) * B * X()); //allocate space for the frame that will be pulled from the file | |
1073 | + double* F_dev; | |
1074 | + HANDLE_ERROR(cudaMalloc(&F_dev, X() * B * sizeof(double))); //allocate space for the frame on the GPU | |
1075 | + double* s_dev; //declare a device pointer that will store the spectrum on the GPU | |
1076 | + double* A_dev; //declare a device pointer that will store the covariance matrix on the GPU | |
1077 | + double* avg_dev; //declare a device pointer that will store the average spectrum | |
1078 | + HANDLE_ERROR(cudaMalloc(&s_dev, B * sizeof(double))); //allocate space on the CUDA device for a spectrum | |
1079 | + HANDLE_ERROR(cudaMalloc(&A_dev, B * B * sizeof(double))); //allocate space on the CUDA device for the covariance matrix | |
1080 | + HANDLE_ERROR(cudaMemset(A_dev, 0, B * B * sizeof(double))); //initialize the covariance matrix to zero (0) | |
1081 | + HANDLE_ERROR(cudaMalloc(&avg_dev, XB * sizeof(double))); //allocate space on the CUDA device for the average spectrum | |
1082 | + for(size_t x = 0; x < X(); x++) //make multiple copies of the average spectrum in order to build a matrix | |
1083 | + HANDLE_ERROR(cudaMemcpy(&avg_dev[x * B], avg, B * sizeof(double), cudaMemcpyHostToDevice)); | |
1084 | + //stat = cublasSetVector((int)B, sizeof(double), avg, 1, avg_dev, 1); //copy the average spectrum to the CUDA device | |
1085 | + | |
1086 | + double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product) | |
1087 | + double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) | |
1088 | + | |
1089 | + CUBLAS_HANDLE_ERROR(stat = cublasCreate(&handle)); //create a cuBLAS instance | |
1090 | + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid | |
1091 | + | |
1092 | + else std::cout<<"Using cuBLAS to calculate the mean covariance matrix..."<<std::endl; | |
1093 | + double beta = 1.0; | |
1094 | + size_t x, y; | |
1095 | + for(y = 0; y < Y(); y++){ //for each line | |
1096 | + read_plane_zxd(F, y); //read a frame from the file | |
1097 | + HANDLE_ERROR(cudaMemcpy(F_dev, F, XB * sizeof(double), cudaMemcpyHostToDevice)); //copy the frame to the GPU | |
1098 | + CUBLAS_HANDLE_ERROR(cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, (int)B, (int)X(), &axpy_alpha, avg_dev, (int)B, &beta, F_dev, (int)B, F_dev, (int)B));//subtract the mean spectrum | |
1099 | + | |
1100 | + for(x = 0; x < X(); x++) | |
1101 | + CUBLAS_HANDLE_ERROR(cublasDsyr(handle, CUBLAS_FILL_MODE_UPPER, (int)B, &ger_alpha, &F_dev[x*B], 1, A_dev, (int)B)); //perform an outer product | |
1102 | + if(PROGRESS) progress = (double)(y + 1) / Y() * 100; | |
1103 | + } | |
1104 | + | |
1105 | + cublasGetMatrix((int)B, (int)B, sizeof(double), A_dev, (int)B, co, (int)B); //copy the result from the GPU to the CPU | |
1106 | + | |
1107 | + cudaFree(A_dev); //clean up allocated device memory | |
1108 | + cudaFree(s_dev); | |
1109 | + cudaFree(avg_dev); | |
1110 | + | |
1111 | + for(unsigned long long i = 0; i < B; i++){ //copy the upper triangular portion to the lower triangular portion | |
1112 | + for(unsigned long long j = i+1; j < B; j++){ | |
1113 | + co[B * i + j] = co[B * j + i]; | |
1114 | + } | |
1115 | + } | |
1116 | + | |
1117 | + return 0; | |
1118 | + | |
1119 | + | |
1120 | + | |
1121 | + } | |
1122 | + | |
1123 | + | |
1025 | 1124 | /// Calculate the covariance matrix for all masked pixels in the image. |
1026 | 1125 | |
1027 | 1126 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1028 | 1127 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1029 | 1128 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1030 | - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1129 | + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ | |
1031 | 1130 | progress = 0; |
1131 | + | |
1132 | + if(use_gpu){ | |
1133 | + int dev_count; | |
1134 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1135 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1136 | + cudaDeviceProp prop; | |
1137 | + int best_device_id = 0; //stores the best CUDA device | |
1138 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1139 | + std::cout<<"CUDA devices:"<<std::endl; | |
1140 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1141 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1142 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1143 | + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1144 | + if(cc > best_device_cc){ | |
1145 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1146 | + best_device_id = d; | |
1147 | + } | |
1148 | + } | |
1149 | + | |
1150 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1151 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1152 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1153 | + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1154 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1155 | + } //otherwise continue using the CPU | |
1156 | + | |
1157 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | |
1158 | + } | |
1159 | + | |
1032 | 1160 | //memory allocation |
1033 | 1161 | unsigned long long xy = X() * Y(); |
1034 | 1162 | unsigned long long B = Z(); |
... | ... | @@ -1325,7 +1453,7 @@ public: |
1325 | 1453 | c = (T*)malloc( L ); //allocate space for the slice |
1326 | 1454 | |
1327 | 1455 | for(unsigned long long j = 0; j < Y(); j++){ //for each line |
1328 | - read_plane_y(c, j); //load the line into memory | |
1456 | + read_plane_xz(c, j); //load the line into memory | |
1329 | 1457 | for(unsigned long long i = 0; i < B; i++){ //for each band |
1330 | 1458 | for(unsigned long long m = 0; m < X(); m++){ //for each sample |
1331 | 1459 | if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked |
... | ... | @@ -1355,7 +1483,7 @@ public: |
1355 | 1483 | c = (T*)malloc( L ); //allocate space for the slice |
1356 | 1484 | |
1357 | 1485 | for(unsigned long long j = 0; j < Y(); j++){ //for each line |
1358 | - read_plane_y(c, j); //load the line into memory | |
1486 | + read_plane_xz(c, j); //load the line into memory | |
1359 | 1487 | for(unsigned long long i = 0; i < B; i++){ //for each band |
1360 | 1488 | for(unsigned long long m = 0; m < X(); m++){ //for each sample |
1361 | 1489 | if( mask == NULL && mask[m + j * X()] ) //if the pixel is masked | ... | ... |
stim/envi/bip.h
... | ... | @@ -5,13 +5,16 @@ |
5 | 5 | #include "../envi/bil.h" |
6 | 6 | #include "../envi/hsi.h" |
7 | 7 | #include <cstring> |
8 | +#include <complex> | |
8 | 9 | #include <utility> |
9 | 10 | |
10 | 11 | //CUDA |
11 | -#ifdef CUDA_FOUND | |
12 | - #include <cuda_runtime.h> | |
13 | - #include "cublas_v2.h" | |
14 | -#endif | |
12 | +//#ifdef CUDA_FOUND | |
13 | +#include <stim/cuda/cudatools/error.h> | |
14 | +#include <cuda_runtime.h> | |
15 | +#include "cublas_v2.h" | |
16 | +#include "cufft.h" | |
17 | +//#endif | |
15 | 18 | |
16 | 19 | namespace stim{ |
17 | 20 | |
... | ... | @@ -257,7 +260,7 @@ public: |
257 | 260 | } |
258 | 261 | |
259 | 262 | //given a Y ,return a ZX slice |
260 | - bool read_plane_y(T * p, unsigned long long y){ | |
263 | + bool read_plane_y(T * p, size_t y){ | |
261 | 264 | return binary<T>::read_plane_2(p, y); |
262 | 265 | } |
263 | 266 | |
... | ... | @@ -954,39 +957,43 @@ public: |
954 | 957 | |
955 | 958 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum |
956 | 959 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
957 | - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){ | |
960 | + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){ | |
958 | 961 | unsigned long long XY = X() * Y(); //calculate the total number of pixels in the HSI |
959 | 962 | T* temp = (T*)malloc(sizeof(T) * Z()); //allocate space for the current spectrum to be read |
960 | - memset(p, 0, sizeof(double) * Z()); //initialize the average spectrum to zero (0) | |
961 | - //for (unsigned j = 0; j < Z(); j++){ | |
962 | - // p[j] = 0; | |
963 | - //} | |
963 | + memset(m, 0, Z() * sizeof(double)); //set the mean spectrum to zero | |
964 | + double* e_x2 = (double*)malloc(Z() * sizeof(double)); //allocate space for E[x^2] | |
965 | + memset(e_x2, 0, Z() * sizeof(double)); //set all values for E[x^2] to zero | |
964 | 966 | |
965 | 967 | unsigned long long count = nnz(mask); //calculate the number of masked pixels |
966 | - | |
968 | + double x; | |
967 | 969 | for (unsigned long long i = 0; i < XY; i++){ //for each pixel in the HSI |
968 | 970 | if (mask == NULL || mask[i] != 0){ //if the pixel is masked |
969 | 971 | pixel(temp, i); //get the spectrum |
970 | 972 | for (unsigned long long j = 0; j < Z(); j++){ //for each spectral component |
971 | - p[j] += (double)temp[j] / (double)count; //add the weighted value to the average | |
973 | + x = temp[j]; | |
974 | + m[j] += x / (double)count; //add the weighted value to the average | |
975 | + e_x2[j] += x*x / (double)count; | |
972 | 976 | } |
973 | 977 | } |
974 | 978 | if(PROGRESS) progress = (double)(i+1) / XY * 100; //increment the progress |
975 | 979 | } |
976 | 980 | |
981 | + //calculate the standard deviation | |
982 | + for(size_t i = 0; i < Z(); i++) | |
983 | + std[i] = sqrt(e_x2[i] - m[i] * m[i]); | |
984 | + | |
977 | 985 | free(temp); |
978 | 986 | return true; |
979 | 987 | } |
980 | -#ifdef CUDA_FOUND | |
988 | +//#ifdef CUDA_FOUND | |
981 | 989 | /// Calculate the covariance matrix for masked pixels using cuBLAS |
982 | 990 | /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra |
983 | - bool co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
991 | + int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
984 | 992 | |
985 | 993 | cudaError_t cudaStat; |
986 | 994 | cublasStatus_t stat; |
987 | 995 | cublasHandle_t handle; |
988 | 996 | |
989 | - progress = 0; //initialize the progress to zero (0) | |
990 | 997 | unsigned long long XY = X() * Y(); //calculate the number of elements in a band image |
991 | 998 | unsigned long long B = Z(); //calculate the number of spectral elements |
992 | 999 | |
... | ... | @@ -1004,10 +1011,9 @@ public: |
1004 | 1011 | double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) |
1005 | 1012 | |
1006 | 1013 | stat = cublasCreate(&handle); //create a cuBLAS instance |
1007 | - if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid | |
1008 | - printf ("CUBLAS initialization failed\n"); | |
1009 | - return EXIT_FAILURE; | |
1010 | - } | |
1014 | + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid | |
1015 | + | |
1016 | + else std::cout<<"Using cuBLAS to calculate the mean covariance matrix..."<<std::endl; | |
1011 | 1017 | for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel |
1012 | 1018 | if (mask == NULL || mask[xy] != 0){ |
1013 | 1019 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location |
... | ... | @@ -1031,26 +1037,45 @@ public: |
1031 | 1037 | } |
1032 | 1038 | } |
1033 | 1039 | |
1034 | - return true; | |
1040 | + return 0; | |
1035 | 1041 | } |
1036 | -#endif | |
1042 | +//#endif | |
1037 | 1043 | |
1038 | 1044 | /// Calculate the covariance matrix for all masked pixels in the image with 64-bit floating point precision. |
1039 | 1045 | |
1040 | 1046 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1041 | 1047 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1042 | 1048 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1043 | - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1044 | - | |
1045 | -#ifdef CUDA_FOUND | |
1046 | - int dev_count; | |
1047 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | |
1048 | - cudaDeviceProp prop; | |
1049 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | |
1050 | - if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator | |
1051 | - return co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1052 | -#endif | |
1049 | + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ | |
1053 | 1050 | progress = 0; |
1051 | + | |
1052 | + if(use_gpu){ | |
1053 | + int dev_count; | |
1054 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1055 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1056 | + cudaDeviceProp prop; | |
1057 | + int best_device_id = 0; //stores the best CUDA device | |
1058 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1059 | + std::cout<<"CUDA devices----"<<std::endl; | |
1060 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1061 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1062 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1063 | + std::cout<<d<<": ["<<prop.major<<"."<<prop.minor<<"] "<<prop.name<<std::endl; //display the device information | |
1064 | + if(cc > best_device_cc){ | |
1065 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1066 | + best_device_id = d; | |
1067 | + } | |
1068 | + } | |
1069 | + | |
1070 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1071 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1072 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1073 | + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1074 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1075 | + } //otherwise continue using the CPU | |
1076 | + | |
1077 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | |
1078 | + } | |
1054 | 1079 | //memory allocation |
1055 | 1080 | unsigned long long XY = X() * Y(); |
1056 | 1081 | unsigned long long B = Z(); |
... | ... | @@ -1092,10 +1117,10 @@ public: |
1092 | 1117 | } |
1093 | 1118 | |
1094 | 1119 | |
1095 | -#ifdef CUDA_FOUND | |
1120 | +//#ifdef CUDA_FOUND | |
1096 | 1121 | /// Calculate the covariance matrix of Noise for masked pixels using cuBLAS |
1097 | 1122 | /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra |
1098 | - bool coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1123 | + int coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1099 | 1124 | |
1100 | 1125 | cudaError_t cudaStat; |
1101 | 1126 | cublasStatus_t stat; |
... | ... | @@ -1123,11 +1148,9 @@ public: |
1123 | 1148 | double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product) |
1124 | 1149 | double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) |
1125 | 1150 | |
1126 | - stat = cublasCreate(&handle); //create a cuBLAS instance | |
1127 | - if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid | |
1128 | - printf ("CUBLAS initialization failed\n"); | |
1129 | - return EXIT_FAILURE; | |
1130 | - } | |
1151 | + CUBLAS_HANDLE_ERROR(cublasCreate(&handle)); //create a cuBLAS instance | |
1152 | + if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid | |
1153 | + | |
1131 | 1154 | for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel |
1132 | 1155 | if (mask == NULL || mask[xy] != 0){ |
1133 | 1156 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location |
... | ... | @@ -1158,27 +1181,44 @@ public: |
1158 | 1181 | } |
1159 | 1182 | } |
1160 | 1183 | |
1161 | - return true; | |
1184 | + return 0; | |
1162 | 1185 | } |
1163 | -#endif | |
1186 | +//#endif | |
1164 | 1187 | |
1165 | 1188 | /// Calculate the covariance of noise matrix for all masked pixels in the image with 64-bit floating point precision. |
1166 | 1189 | |
1167 | 1190 | /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1168 | 1191 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1169 | 1192 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1170 | - bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1171 | - | |
1172 | -#ifdef CUDA_FOUND | |
1173 | - int dev_count; | |
1174 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | |
1175 | - cudaDeviceProp prop; | |
1176 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | |
1177 | - if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator | |
1178 | - return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1179 | -#endif | |
1180 | - | |
1181 | - | |
1193 | + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ | |
1194 | + | |
1195 | + if(use_gpu){ | |
1196 | + int dev_count; | |
1197 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1198 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1199 | + cudaDeviceProp prop; | |
1200 | + int best_device_id = 0; //stores the best CUDA device | |
1201 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1202 | + std::cout<<"CUDA devices:"<<std::endl; | |
1203 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1204 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1205 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1206 | + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1207 | + if(cc > best_device_cc){ | |
1208 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1209 | + best_device_id = d; | |
1210 | + } | |
1211 | + } | |
1212 | + | |
1213 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1214 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1215 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1216 | + int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1217 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1218 | + } //otherwise continue using the CPU | |
1219 | + | |
1220 | + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl; | |
1221 | + } | |
1182 | 1222 | |
1183 | 1223 | progress = 0; |
1184 | 1224 | //memory allocation |
... | ... | @@ -1443,7 +1483,7 @@ public: |
1443 | 1483 | unsigned long long jump_sample = ( (Z() - b1) + b0 ) * sizeof(T); |
1444 | 1484 | |
1445 | 1485 | //distance between sample spectra in adjacent lines |
1446 | - unsigned long long jump_line = (X() - x1) * Z() * sizeof(T); | |
1486 | + unsigned long long jump_line = ( X() - x1 + x0 ) * Z() * sizeof(T); | |
1447 | 1487 | |
1448 | 1488 | |
1449 | 1489 | //unsigned long long sp = y0 * X() + x0; //start pixel |
... | ... | @@ -1682,7 +1722,117 @@ public: |
1682 | 1722 | return true; |
1683 | 1723 | } |
1684 | 1724 | |
1725 | + int fft(std::string outname, size_t bandmin, size_t bandmax, size_t samples = 0, T* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int device = 0){ | |
1726 | + if(device == -1){ | |
1727 | + std::cout<<"ERROR: GPU required for FFT (uses cuFFT)."<<std::endl; | |
1728 | + exit(1); | |
1729 | + } | |
1730 | + if(samples == 0) samples = Z(); //if samples are specified, use all of them | |
1731 | + if(samples > Z()){ | |
1732 | + std::cout<<"ERROR: stim::envi doesn't support FFT padding just yet."<<std::endl; | |
1733 | + exit(1); | |
1734 | + } | |
1735 | + int nd; //stores the number of CUDA devices | |
1736 | + HANDLE_ERROR(cudaGetDeviceCount(&nd)); //get the number of CUDA devices | |
1737 | + if(device >= nd){ //test for the existence of the requested device | |
1738 | + std::cout<<"ERROR: requested CUDA device for stim::envi::fft() doesn't exist"<<std::endl; | |
1739 | + exit(1); | |
1740 | + } | |
1741 | + HANDLE_ERROR(cudaSetDevice(device)); //set the CUDA device | |
1742 | + cudaDeviceProp prop; | |
1743 | + HANDLE_ERROR(cudaGetDeviceProperties(&prop, device)); //get the CUDA device properties | |
1744 | + | |
1745 | + size_t B = Z(); | |
1746 | + size_t S = samples; | |
1747 | + size_t fft_size = S * sizeof(T); //number of bytes for each FFT | |
1748 | + size_t cuda_bytes = prop.totalGlobalMem; //get the number of bytes of global memory available | |
1749 | + size_t cuda_use = (size_t)floor(cuda_bytes * 0.2); //only use 80% | |
1750 | + size_t nS = cuda_use / fft_size; //calculate the number of spectra that can be loaded onto the GPU as a single batch | |
1751 | + size_t batch_bytes = nS * fft_size; //calculate the size of a batch (in bytes) | |
1752 | + size_t fft_bytes = nS * (S/2 + 1) * sizeof(cufftComplex); | |
1753 | + T* batch = (T*) malloc(batch_bytes); //allocate space in host memory to store a batch | |
1754 | + memset(batch, 0, batch_bytes); | |
1755 | + std::complex<T>* batch_fft = (std::complex<T>*) malloc(fft_bytes); | |
1756 | + T* gpu_batch; //device pointer to the batch | |
1757 | + HANDLE_ERROR(cudaMalloc(&gpu_batch, batch_bytes)); //allocate space on the device for the FFT batch | |
1758 | + cufftComplex* gpu_batch_fft; //allocate space for the FFT result | |
1759 | + HANDLE_ERROR(cudaMalloc(&gpu_batch_fft, fft_bytes)); | |
1760 | + int N[1]; //create an array with the interferogram size (required for cuFFT input) | |
1761 | + N[0] = (int)S; //set the only array value to the interferogram size | |
1762 | + | |
1763 | + //if a background is provided for a ratio | |
1764 | + std::complex<T>* ratio_fft = NULL; //create a pointer for the FFT of the ratio image (if it exists) | |
1765 | + if(ratio){ | |
1766 | + size_t bkg_bytes = rx * ry * S * sizeof(T); //calculate the total number of bytes in the background image | |
1767 | + T* bkg_copy = (T*) malloc(bkg_bytes); //allocate space to copy the background | |
1768 | + if(S == Z()) memcpy(bkg_copy, ratio, bkg_bytes); //if the number of samples used in processing equals the number of available samples | |
1769 | + else{ | |
1770 | + for(size_t xyi = 0; xyi < rx*ry; xyi++) | |
1771 | + memcpy(&bkg_copy[xyi * S], &ratio[xyi * B], S * sizeof(T)); | |
1772 | + } | |
1773 | + T* gpu_ratio; | |
1774 | + HANDLE_ERROR(cudaMalloc(&gpu_ratio, bkg_bytes)); | |
1775 | + HANDLE_ERROR(cudaMemcpy(gpu_ratio, bkg_copy, bkg_bytes, cudaMemcpyHostToDevice)); | |
1776 | + cufftHandle bkg_plan; | |
1777 | + CUFFT_HANDLE_ERROR(cufftPlanMany(&bkg_plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)(rx * ry))); | |
1778 | + size_t bkg_fft_bytes = rx * ry * (S / 2 + 1) * sizeof(cufftComplex); | |
1779 | + T* gpu_ratio_fft; | |
1780 | + HANDLE_ERROR(cudaMalloc(&gpu_ratio_fft, bkg_fft_bytes)); | |
1781 | + CUFFT_HANDLE_ERROR(cufftExecR2C(bkg_plan, (cufftReal*)gpu_ratio, (cufftComplex*)gpu_ratio_fft)); | |
1782 | + ratio_fft = (std::complex<T>*) malloc(bkg_fft_bytes); | |
1783 | + HANDLE_ERROR(cudaMemcpy(ratio_fft, gpu_ratio_fft, bkg_fft_bytes, cudaMemcpyDeviceToHost)); | |
1784 | + HANDLE_ERROR(cudaFree(gpu_ratio)); | |
1785 | + HANDLE_ERROR(cudaFree(gpu_ratio_fft)); | |
1786 | + CUFFT_HANDLE_ERROR(cufftDestroy(bkg_plan)); | |
1787 | + } | |
1685 | 1788 | |
1789 | + cufftHandle plan; //create a CUFFT plan | |
1790 | + CUFFT_HANDLE_ERROR(cufftPlanMany(&plan, 1, N, NULL, 1, N[0], NULL, 1, N[0], CUFFT_R2C, (int)nS)); | |
1791 | + | |
1792 | + std::ofstream outfile(outname, std::ios::binary); //open a file for writing | |
1793 | + | |
1794 | + size_t XY = X() * Y(); //calculate the number of spectra | |
1795 | + size_t xy = 0; | |
1796 | + size_t bs; //stores the number of spectra in the current batch | |
1797 | + size_t s, b; | |
1798 | + size_t S_fft = S/2 + 1; | |
1799 | + size_t bandkeep = bandmax - bandmin + 1; | |
1800 | + size_t x, y; | |
1801 | + size_t ratio_i; | |
1802 | + T* temp_spec = (T*) malloc(Z() * sizeof(T)); //allocate space to hold a single pixel | |
1803 | + while(xy < XY){ //while there are unprocessed spectra | |
1804 | + bs = min(XY - xy, nS); //calculate the number of spectra to include in the batch | |
1805 | + for(s = 0; s < bs; s++){ //for each spectrum in the batch | |
1806 | + pixel(temp_spec, xy + s); //read a pixel from disk | |
1807 | + memcpy(&batch[s * S], temp_spec, S * sizeof(T)); | |
1808 | + //pixel(&batch[s * S], xy + s); //read the next spectrum | |
1809 | + } | |
1810 | + HANDLE_ERROR(cudaMemcpy(gpu_batch, batch, batch_bytes, cudaMemcpyHostToDevice)); | |
1811 | + CUFFT_HANDLE_ERROR(cufftExecR2C(plan, (cufftReal*)gpu_batch, gpu_batch_fft)); //execute the (implicitly forward) transform | |
1812 | + HANDLE_ERROR(cudaMemcpy(batch_fft, gpu_batch_fft, fft_bytes, cudaMemcpyDeviceToHost)); //copy the data back to the GPU | |
1813 | + for(s = 0; s < bs; s++){ //for each spectrum in the batch | |
1814 | + y = (xy + s)/X(); | |
1815 | + x = xy + s - y * X(); | |
1816 | + if(ratio_fft) ratio_i = (y % ry) * rx + (x % rx); //if a background is used, calculate the coordinates into it | |
1817 | + for(b = 0; b < S/2 + 1; b++){ //for each sample | |
1818 | + if(ratio_fft) | |
1819 | + batch[s * S + b] = -log(abs(batch_fft[s * S_fft + b]) / abs(ratio_fft[ratio_i * S_fft + b])); | |
1820 | + else | |
1821 | + batch[s * S + b] = abs(batch_fft[s * S_fft + b]); //calculate the magnitude of the spectrum | |
1822 | + } | |
1823 | + outfile.write((char*)&batch[s * S + bandmin], bandkeep * sizeof(T)); //save the resulting spectrum | |
1824 | + } | |
1825 | + xy += bs; //increment xy by the number of spectra processed | |
1826 | + if(PROGRESS) progress = (double)xy / (double)XY * 100; | |
1827 | + } | |
1828 | + outfile.close(); | |
1829 | + free(ratio_fft); | |
1830 | + free(batch_fft); | |
1831 | + free(batch); | |
1832 | + HANDLE_ERROR(cudaFree(gpu_batch)); | |
1833 | + HANDLE_ERROR(cudaFree(gpu_batch_fft)); | |
1834 | + return 0; | |
1835 | + } | |
1686 | 1836 | |
1687 | 1837 | /// Close the file. |
1688 | 1838 | bool close(){ | ... | ... |
stim/envi/bsq.h
... | ... | @@ -104,6 +104,7 @@ public: |
104 | 104 | //if wavelength is smaller than the first one in header file |
105 | 105 | if ( w[page] > wavelength ){ |
106 | 106 | band_index(p, page); |
107 | + if(PROGRESS) progress = 100; | |
107 | 108 | return true; |
108 | 109 | } |
109 | 110 | |
... | ... | @@ -114,6 +115,7 @@ public: |
114 | 115 | // (the wavelength is out of bounds) |
115 | 116 | if (page == Z()) { |
116 | 117 | band_index(p, Z()-1); //return the last band |
118 | + if(PROGRESS) progress = 100; | |
117 | 119 | return true; |
118 | 120 | } |
119 | 121 | } |
... | ... | @@ -561,12 +563,12 @@ public: |
561 | 563 | free(src[1]); |
562 | 564 | free(dst[0]); |
563 | 565 | free(dst[1]); |
564 | - //if(VERBOSE){ | |
566 | + if(VERBOSE){ | |
565 | 567 | std::cout<<"total time to execute bsq::bip(): "<<t_total<<" ms"<<std::endl; |
566 | 568 | std::cout<<" total time spent processing: "<<pt_total<<" ms"<<std::endl; |
567 | 569 | std::cout<<" total time spent reading: "<<rt_total<<" ms"<<std::endl; |
568 | 570 | std::cout<<" total time spent writing: "<<wt_total<<" ms"<<std::endl; |
569 | - //} | |
571 | + } | |
570 | 572 | return true; //return true |
571 | 573 | } |
572 | 574 | |
... | ... | @@ -1120,27 +1122,61 @@ public: |
1120 | 1122 | |
1121 | 1123 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum |
1122 | 1124 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1123 | - bool avg_band(double* p, unsigned char* mask = NULL, bool PROGRESS = false){ | |
1125 | + bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){ | |
1124 | 1126 | unsigned long long XY = X() * Y(); |
1125 | - unsigned long long count = 0; //count will store the number of masked pixels | |
1127 | + unsigned long long count = nnz(mask); //count will store the number of masked pixels | |
1126 | 1128 | T* temp = (T*)malloc(sizeof(T) * XY); |
1127 | - //calculate this loop counts the number of true pixels in the mask | |
1128 | - for (unsigned j = 0; j < XY; j++){ | |
1129 | - if (mask == NULL || mask[j] != 0){ | |
1130 | - count++; | |
1131 | - } | |
1132 | - } | |
1129 | + | |
1133 | 1130 | //this loops goes through each band in B (Z()) |
1134 | 1131 | // masked (or valid) pixels from that band are averaged and the average is stored in p |
1132 | + double e_x; //stores E[x]^2 | |
1133 | + double e_x2; //stores E[x^2] | |
1134 | + double x; | |
1135 | 1135 | for (unsigned long long i = 0; i < Z(); i++){ |
1136 | - p[i] = 0; | |
1136 | + e_x = 0; | |
1137 | + e_x2 = 0; | |
1137 | 1138 | band_index(temp, i); //get the band image and store it in temp |
1138 | 1139 | for (unsigned long long j = 0; j < XY; j++){ //loop through temp, averaging valid pixels |
1139 | 1140 | if (mask == NULL || mask[j] != 0){ |
1140 | - p[i] += (double)temp[j] / (double)count; | |
1141 | + x = (double)temp[j]; | |
1142 | + e_x += x / (double)count; //sum the expected value of x | |
1143 | + e_x2 += (x * x) / (double)count; //sum the expected value of x^2 | |
1141 | 1144 | } |
1142 | 1145 | } |
1143 | - if(PROGRESS) progress = (double)(i+1) / Z() * 100; | |
1146 | + m[i] = e_x; //store the mean | |
1147 | + std[i] = sqrt(e_x2 - e_x * e_x); //calculate the standard deviation | |
1148 | + if(PROGRESS) progress = (double)(i+1) / Z() * 100; //update the progress counter | |
1149 | + } | |
1150 | + free(temp); | |
1151 | + return true; | |
1152 | + } | |
1153 | + | |
1154 | + /// Calculate the median value for all masked (or valid) pixels in a band and returns the median spectrum | |
1155 | + | |
1156 | + /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum | |
1157 | + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | |
1158 | + bool median_spectrum(double* m, unsigned char* mask = NULL, bool PROGRESS = false){ | |
1159 | + size_t XY = X() * Y(); | |
1160 | + size_t count = nnz(mask); //count will store the number of masked pixels | |
1161 | + T* temp = (T*)malloc(sizeof(T) * XY); | |
1162 | + | |
1163 | + std::vector<T> band_values(count); //create an STD vector of band values | |
1164 | + | |
1165 | + //this loops goes through each band in B (Z()) | |
1166 | + // masked (or valid) pixels from that band are averaged and the average is stored in p | |
1167 | + size_t k; | |
1168 | + for (size_t i = 0; i < Z(); i++){ //for each band | |
1169 | + band_index(temp, i); //get the band image and store it in temp | |
1170 | + k = 0; //initialize the band_value index to zero | |
1171 | + for (size_t j = 0; j < XY; j++){ //loop through temp, averaging valid pixels | |
1172 | + if (mask == NULL || mask[j] != 0){ | |
1173 | + band_values[k] = temp[j]; //store the value in the band_values array | |
1174 | + k++; //increment the band_values index | |
1175 | + } | |
1176 | + } | |
1177 | + std::sort(band_values.begin(), band_values.end()); //sort all of the values in the band | |
1178 | + m[i] = band_values[ count/2 ]; //store the center value in the array | |
1179 | + if(PROGRESS) progress = (double)(i+1) / Z() * 100; //update the progress counter | |
1144 | 1180 | } |
1145 | 1181 | free(temp); |
1146 | 1182 | return true; |
... | ... | @@ -1203,6 +1239,52 @@ public: |
1203 | 1239 | return true; |
1204 | 1240 | } |
1205 | 1241 | |
1242 | + ///Crop out several subimages and assemble a new image from these concatenated subimages | |
1243 | + | |
1244 | + /// @param outfile is the file name for the output image | |
1245 | + /// @param sx is the width of each subimage | |
1246 | + /// @param sy is the height of each subimage | |
1247 | + /// @mask is the mask used to define subimage positions extracted from the input file | |
1248 | + void subimages(std::string outfile, size_t sx, size_t sy, unsigned char* mask, bool PROGRESS = false){ | |
1249 | + | |
1250 | + size_t N = nnz(mask); //get the number of subimages | |
1251 | + T* dst = (T*) malloc(N * sx * sy * sizeof(T)); //allocate space for a single band of the output image | |
1252 | + memset(dst, 0, N*sx*sy*sizeof(T)); //initialize the band image to zero | |
1253 | + | |
1254 | + std::ofstream out(outfile, std::ios::binary); //open a file for writing | |
1255 | + | |
1256 | + T* src = (T*) malloc(X() * Y() * sizeof(T)); | |
1257 | + | |
1258 | + for(size_t b = 0; b < Z(); b++){ //for each band | |
1259 | + band_index(src, b); //load the band image | |
1260 | + size_t i = 0; //create an image index and initialize it to zero | |
1261 | + size_t n = 0; | |
1262 | + while(n < N){ //for each subimage | |
1263 | + if(mask[i]){ //if the pixel is masked, copy the surrounding pixels into the destination band | |
1264 | + size_t yi = i / X(); //determine the y position of the current pixel | |
1265 | + size_t xi = i - yi * X(); //determine the x position of the current pixel | |
1266 | + if( xi > sx/2 && xi < X() - sx/2 && //if the subimage is completely within the bounds of the original image | |
1267 | + yi > sy/2 && yi < Y() - sy/2){ | |
1268 | + size_t cx = xi - sx/2; //calculate the corner position for the subimage | |
1269 | + size_t cy = yi - sy/2; | |
1270 | + for(size_t syi = 0; syi < sy; syi++){ //for each line in the subimage | |
1271 | + size_t src_i = (cy + syi) * X() + cx; | |
1272 | + //size_t dst_i = syi * (N * sx) + n * sx; | |
1273 | + size_t dst_i = (n * sy + syi) * sx; | |
1274 | + memcpy(&dst[dst_i], &src[src_i], sx * sizeof(T)); //copy one line from the subimage to the destination image | |
1275 | + } | |
1276 | + n++; | |
1277 | + } | |
1278 | + } | |
1279 | + i++; | |
1280 | + if(PROGRESS) progress = (double)( (n+1) * (b+1) ) / (N * Z()) * 100; | |
1281 | + }//end while n | |
1282 | + out.write((const char*)dst, N * sx * sy * sizeof(T)); //write the band to memory | |
1283 | + } | |
1284 | + free(dst); //free memory | |
1285 | + free(src); | |
1286 | + } | |
1287 | + | |
1206 | 1288 | /// Remove a list of bands from the ENVI file |
1207 | 1289 | |
1208 | 1290 | /// @param outfile is the file name for the output hyperspectral image (with trimmed bands) | ... | ... |
stim/envi/envi.h
... | ... | @@ -6,6 +6,8 @@ |
6 | 6 | #include "../envi/bip.h" |
7 | 7 | #include "../envi/bil.h" |
8 | 8 | #include "../math/fd_coefficients.h" |
9 | +#include <stim/parser/filename.h> | |
10 | +#include <stim/util/filesize.h> | |
9 | 11 | #include <iostream> |
10 | 12 | #include <fstream> |
11 | 13 | //#include "../image/image.h" |
... | ... | @@ -76,7 +78,31 @@ public: |
76 | 78 | |
77 | 79 | allocate(); |
78 | 80 | } |
81 | + //used to test if the current ENVI file is valid | |
82 | + operator bool(){ | |
83 | + if(file == NULL) return false; | |
84 | + return true; | |
85 | + } | |
86 | + | |
87 | + //test to determine if the specified file is an ENVI file | |
88 | + static bool is_envi(std::string fname, std::string hname = ""){ | |
89 | + stim::filename data_file(fname); | |
90 | + stim::filename header_file; | |
91 | + if(hname == ""){ //if the header isn't provided | |
92 | + header_file = data_file; //assume that it's the same name as the data file, with a .hdr extension | |
93 | + header_file = header_file.extension("hdr"); | |
94 | + } | |
95 | + else header_file = hname; //otherwise load the passed header | |
96 | + | |
97 | + stim::envi_header H; | |
98 | + if(H.load(header_file) == false) //load the header file, if it doesn't load return false | |
99 | + return false; | |
100 | + size_t targetBytes = H.data_bytes(); //get the number of bytes that SHOULD be in the data file | |
101 | + size_t bytes = stim::file_size(fname); | |
102 | + if(bytes != targetBytes) return false; //if the data doesn't match the header, return false | |
103 | + return true; //otherwise everything looks fine | |
79 | 104 | |
105 | + } | |
80 | 106 | |
81 | 107 | |
82 | 108 | void* malloc_spectrum(){ |
... | ... | @@ -359,11 +385,23 @@ public: |
359 | 385 | |
360 | 386 | fseek(f, 9, SEEK_SET); //seek to the number of bands |
361 | 387 | short b; //allocate space for the number of bands |
362 | - fread(&b, sizeof(short), 1, f); //read the number of bands | |
388 | + size_t nread = fread(&b, sizeof(short), 1, f); //read the number of bands | |
389 | + if(nread != 1){ | |
390 | + std::cout<<"Error reading band number from Agilent file."<<std::endl; | |
391 | + exit(1); | |
392 | + } | |
363 | 393 | fseek(f, 13, SEEK_CUR); //skip the the x and y dimensions |
364 | 394 | short x, y; |
365 | - fread(&x, sizeof(short), 1, f); //read the image x and y size | |
366 | - fread(&y, sizeof(short), 1, f); | |
395 | + nread = fread(&x, sizeof(short), 1, f); //read the image x and y size | |
396 | + if(nread != 1){ | |
397 | + std::cout<<"Error reading X dimension from Agilent file."<<std::endl; | |
398 | + exit(1); | |
399 | + } | |
400 | + nread = fread(&y, sizeof(short), 1, f); | |
401 | + if(nread != 1){ | |
402 | + std::cout<<"Error reading Y dimension from Agilent file."<<std::endl; | |
403 | + exit(1); | |
404 | + } | |
367 | 405 | fclose(f); //close the file |
368 | 406 | |
369 | 407 | //store the information from the Agilent header in the ENVI header |
... | ... | @@ -1368,12 +1406,12 @@ public: |
1368 | 1406 | |
1369 | 1407 | /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum |
1370 | 1408 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1371 | - bool avg_band(double * p, unsigned char* mask, bool PROGRESS = false){ | |
1409 | + bool mean_spectrum(double * p, double* std, unsigned char* mask, bool PROGRESS = false){ | |
1372 | 1410 | if (header.interleave == envi_header::BSQ){ |
1373 | 1411 | if (header.data_type == envi_header::float32) |
1374 | - return ((bsq<float>*)file)->avg_band(p, mask, PROGRESS); | |
1412 | + return ((bsq<float>*)file)->mean_spectrum(p, std, mask, PROGRESS); | |
1375 | 1413 | else if (header.data_type == envi_header::float64) |
1376 | - return ((bsq<double>*)file)->avg_band(p, mask, PROGRESS); | |
1414 | + return ((bsq<double>*)file)->mean_spectrum(p, std, mask, PROGRESS); | |
1377 | 1415 | else{ |
1378 | 1416 | std::cout << "ERROR: unidentified data type" << std::endl; |
1379 | 1417 | exit(1); |
... | ... | @@ -1381,9 +1419,9 @@ public: |
1381 | 1419 | } |
1382 | 1420 | else if (header.interleave == envi_header::BIL){ |
1383 | 1421 | if (header.data_type == envi_header::float32) |
1384 | - return ((bil<float>*)file)->avg_band(p, mask, PROGRESS); | |
1422 | + return ((bil<float>*)file)->mean_spectrum(p, std, mask, PROGRESS); | |
1385 | 1423 | else if (header.data_type == envi_header::float64) |
1386 | - return ((bil<double>*)file)->avg_band(p, mask, PROGRESS); | |
1424 | + return ((bil<double>*)file)->mean_spectrum(p, std, mask, PROGRESS); | |
1387 | 1425 | else{ |
1388 | 1426 | std::cout << "ERROR: unidentified data type" << std::endl; |
1389 | 1427 | exit(1); |
... | ... | @@ -1391,14 +1429,36 @@ public: |
1391 | 1429 | } |
1392 | 1430 | else if (header.interleave == envi_header::BIP){ |
1393 | 1431 | if (header.data_type == envi_header::float32) |
1394 | - return ((bip<float>*)file)->avg_band(p, mask, PROGRESS); | |
1432 | + return ((bip<float>*)file)->mean_spectrum(p, std, mask, PROGRESS); | |
1433 | + else if (header.data_type == envi_header::float64) | |
1434 | + return ((bip<double>*)file)->mean_spectrum(p, std, mask, PROGRESS); | |
1435 | + else{ | |
1436 | + std::cout << "ERROR: unidentified data type" << std::endl; | |
1437 | + exit(1); | |
1438 | + } | |
1439 | + } | |
1440 | + return false; | |
1441 | + } | |
1442 | + | |
1443 | + /// Calculate the mean value for all masked (or valid) pixels in a band and returns the average spectrum | |
1444 | + | |
1445 | + /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum | |
1446 | + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | |
1447 | + bool median_spectrum(double* m, unsigned char* mask, bool PROGRESS = false){ | |
1448 | + if (header.interleave == envi_header::BSQ){ | |
1449 | + if (header.data_type == envi_header::float32) | |
1450 | + return ((bsq<float>*)file)->median_spectrum(m, mask, PROGRESS); | |
1395 | 1451 | else if (header.data_type == envi_header::float64) |
1396 | - return ((bip<double>*)file)->avg_band(p, mask, PROGRESS); | |
1452 | + return ((bsq<double>*)file)->median_spectrum(m, mask, PROGRESS); | |
1397 | 1453 | else{ |
1398 | 1454 | std::cout << "ERROR: unidentified data type" << std::endl; |
1399 | 1455 | exit(1); |
1400 | 1456 | } |
1401 | 1457 | } |
1458 | + else{ | |
1459 | + std::cout<<"ERROR: median calculation is only supported for BSQ interleave types. Convert to process."<<std::endl; | |
1460 | + exit(1); | |
1461 | + } | |
1402 | 1462 | return false; |
1403 | 1463 | } |
1404 | 1464 | |
... | ... | @@ -1407,16 +1467,16 @@ public: |
1407 | 1467 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1408 | 1468 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1409 | 1469 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1410 | - bool co_matrix(double* co, double* avg, unsigned char* mask, bool PROGRESS = false){ | |
1470 | + bool co_matrix(double* co, double* avg, unsigned char* mask, bool use_gpu, bool PROGRESS = false){ | |
1411 | 1471 | if (header.interleave == envi_header::BSQ){ |
1412 | 1472 | std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl; |
1413 | 1473 | exit(1); |
1414 | 1474 | } |
1415 | 1475 | else if (header.interleave == envi_header::BIL){ |
1416 | 1476 | if (header.data_type == envi_header::float32) |
1417 | - return ((bil<float>*)file)->co_matrix(co, avg, mask, PROGRESS); | |
1477 | + return ((bil<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); | |
1418 | 1478 | else if (header.data_type == envi_header::float64) |
1419 | - return ((bil<double>*)file)->co_matrix(co, avg, mask, PROGRESS); | |
1479 | + return ((bil<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); | |
1420 | 1480 | else{ |
1421 | 1481 | std::cout << "ERROR: unidentified data type" << std::endl; |
1422 | 1482 | exit(1); |
... | ... | @@ -1424,9 +1484,9 @@ public: |
1424 | 1484 | } |
1425 | 1485 | else if (header.interleave == envi_header::BIP){ |
1426 | 1486 | if (header.data_type == envi_header::float32) |
1427 | - return ((bip<float>*)file)->co_matrix(co, avg, mask, PROGRESS); | |
1487 | + return ((bip<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); | |
1428 | 1488 | else if (header.data_type == envi_header::float64) |
1429 | - return ((bip<double>*)file)->co_matrix(co, avg, mask, PROGRESS); | |
1489 | + return ((bip<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); | |
1430 | 1490 | else{ |
1431 | 1491 | std::cout << "ERROR: unidentified data type" << std::endl; |
1432 | 1492 | exit(1); |
... | ... | @@ -1440,7 +1500,7 @@ public: |
1440 | 1500 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1441 | 1501 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1442 | 1502 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1443 | - bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){ | |
1503 | + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool use_gpu = true, bool PROGRESS = false){ | |
1444 | 1504 | if (header.interleave == envi_header::BSQ){ |
1445 | 1505 | std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl; |
1446 | 1506 | exit(1); |
... | ... | @@ -1454,9 +1514,9 @@ public: |
1454 | 1514 | |
1455 | 1515 | else if (header.interleave == envi_header::BIP){ |
1456 | 1516 | if (header.data_type == envi_header::float32) |
1457 | - return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); | |
1517 | + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS); | |
1458 | 1518 | else if (header.data_type == envi_header::float64) |
1459 | - return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); | |
1519 | + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS); | |
1460 | 1520 | else{ |
1461 | 1521 | std::cout << "ERROR: unidentified data type" << std::endl; |
1462 | 1522 | exit(1); |
... | ... | @@ -1524,6 +1584,41 @@ public: |
1524 | 1584 | return false; |
1525 | 1585 | } |
1526 | 1586 | |
1587 | + void subimages(std::string outfile, size_t nx, size_t ny, unsigned char* mask, bool PROGRESS = false){ | |
1588 | + | |
1589 | + size_t nnz = 0; //initialize the number of subimages to zero | |
1590 | + for(size_t i = 0; i < header.lines * header.samples; i++) //for each pixel in the mask | |
1591 | + if(mask[i]) nnz++; //if the pixel is valid, add a subimage | |
1592 | + | |
1593 | + | |
1594 | + //save the header for the cropped file | |
1595 | + stim::envi_header new_header = header; | |
1596 | + new_header.samples = nx; //calculate the width of the output image (concatenated subimages) | |
1597 | + new_header.lines = nnz * ny; //calculate the height of the output image (height of subimages) | |
1598 | + | |
1599 | + | |
1600 | + if (header.interleave == envi_header::BSQ){ | |
1601 | + if (header.data_type == envi_header::float32) | |
1602 | + ((bsq<float>*)file)->subimages(outfile, nx, ny, mask, PROGRESS); | |
1603 | + else if (header.data_type == envi_header::float64) | |
1604 | + ((bsq<double>*)file)->subimages(outfile, nx, ny, mask, PROGRESS); | |
1605 | + else{ | |
1606 | + std::cout << "ERROR: unidentified data type" << std::endl; | |
1607 | + exit(1); | |
1608 | + } | |
1609 | + } | |
1610 | + else if (header.interleave == envi_header::BIL){ | |
1611 | + std::cout << "ERROR: unidentified data type" << std::endl; | |
1612 | + exit(1); | |
1613 | + } | |
1614 | + else if (header.interleave == envi_header::BIP){ | |
1615 | + std::cout << "ERROR: unidentified data type" << std::endl; | |
1616 | + exit(1); | |
1617 | + } | |
1618 | + | |
1619 | + new_header.save(outfile + ".hdr"); //save the header for the output file | |
1620 | + } | |
1621 | + | |
1527 | 1622 | /// Remove a list of bands from the ENVI file |
1528 | 1623 | |
1529 | 1624 | /// @param outfile is the file name for the output hyperspectral image (with trimmed bands) |
... | ... | @@ -1801,6 +1896,44 @@ public: |
1801 | 1896 | } |
1802 | 1897 | exit(1); |
1803 | 1898 | } |
1899 | + | |
1900 | + | |
1901 | + | |
1902 | + | |
1903 | + void fft(std::string outfile, double band_min, double band_max, size_t samples = 0, void* ratio = NULL, size_t rx = 0, size_t ry = 0, bool PROGRESS = false, int cuda_device = 0){ | |
1904 | + if(samples == 0) samples = header.bands; | |
1905 | + double B = (double)header.bands; | |
1906 | + double delta = header.wavelength[1] - header.wavelength[0]; //calculate spacing in the current domain | |
1907 | + double span = samples * delta; //calculate the span in the current domain | |
1908 | + double fft_delta = 1.0 / span; //calculate the span in the FFT domain | |
1909 | + double fft_max = fft_delta * samples/2; //calculate the maximum range of the FFT | |
1910 | + | |
1911 | + if(band_max > fft_max) band_max = fft_max; //the user gave a band outside of the FFT range, reset the band to the maximum available | |
1912 | + size_t start_i = (size_t)std::ceil(band_min / fft_delta); //calculate the first band to store | |
1913 | + size_t size_i = (size_t)std::floor(band_max / fft_delta) - start_i + 1; //calculate the number of bands to store | |
1914 | + size_t end_i = start_i + size_i - 1; //last band number | |
1915 | + | |
1916 | + envi_header new_header = header; | |
1917 | + new_header.bands = size_i; | |
1918 | + new_header.set_wavelengths(start_i * fft_delta, fft_delta); | |
1919 | + new_header.wavelength_units = "inv_" + header.wavelength_units; | |
1920 | + new_header.save(outfile + ".hdr"); | |
1921 | + | |
1922 | + if (header.interleave == envi_header::BIP){ | |
1923 | + if (header.data_type == envi_header::float32) | |
1924 | + ((bip<float>*)file)->fft(outfile, start_i, end_i, samples, (float*)ratio, rx, ry, PROGRESS, cuda_device); | |
1925 | + else if (header.data_type == envi_header::float64) | |
1926 | + ((bip<double>*)file)->fft(outfile, start_i, end_i, samples, (double*)ratio, rx, ry, PROGRESS, cuda_device); | |
1927 | + else{ | |
1928 | + std::cout << "ERROR: unidentified data type" << std::endl; | |
1929 | + exit(1); | |
1930 | + } | |
1931 | + } | |
1932 | + else{ | |
1933 | + std::cout<<"ERROR: only BIP files supported for FFT"<<std::endl; | |
1934 | + exit(1); | |
1935 | + } | |
1936 | + } | |
1804 | 1937 | }; //end ENVI |
1805 | 1938 | |
1806 | 1939 | } //end namespace rts | ... | ... |
stim/envi/envi_header.h
... | ... | @@ -78,6 +78,14 @@ struct envi_header |
78 | 78 | load(name); |
79 | 79 | } |
80 | 80 | |
81 | + //sets the wavelength vector given a starting value and uniform step size | |
82 | + void set_wavelengths(double start, double step){ | |
83 | + size_t B = bands; //get the number of bands | |
84 | + wavelength.resize(B); | |
85 | + for(size_t b = 0; b < B; b++) | |
86 | + wavelength[b] = start + b * step; | |
87 | + } | |
88 | + | |
81 | 89 | std::string trim(std::string line){ |
82 | 90 | |
83 | 91 | if(line.length() == 0) |
... | ... | @@ -417,8 +425,13 @@ struct envi_header |
417 | 425 | default: |
418 | 426 | return 0; |
419 | 427 | } |
428 | + } | |
420 | 429 | |
430 | + //return the number of bytes that SHOULD be in the data file | |
431 | + size_t data_bytes(){ | |
432 | + return samples * lines * bands * valsize() + header_offset; | |
421 | 433 | } |
434 | + | |
422 | 435 | |
423 | 436 | /// Convert an interleave type to a string |
424 | 437 | static std::string interleave_str(interleaveType t){ | ... | ... |
stim/envi/hsi.h
... | ... | @@ -142,7 +142,7 @@ public: |
142 | 142 | void mask_finite(unsigned char* out_mask, unsigned char* mask, bool PROGRESS = false){ |
143 | 143 | size_t XY = X() * Y(); |
144 | 144 | if(mask == NULL) //if no mask is provided |
145 | - memset(mask, 255, XY * sizeof(unsigned char)); //initialize the mask to 255 | |
145 | + memset(out_mask, 255, XY * sizeof(unsigned char)); //initialize the mask to 255 | |
146 | 146 | else //if a mask is provided |
147 | 147 | memcpy(out_mask, mask, XY * sizeof(unsigned char)); //initialize the current mask to that one |
148 | 148 | T* page = (T*)malloc(R[0] * R[1] * sizeof(T)); //allocate space for a page of data | ... | ... |
stim/gl/error.h
stim/gl/gl_spider.h
... | ... | @@ -479,7 +479,7 @@ class gl_spider // : public virtual gl_texture<T> |
479 | 479 | glEndList(); ///finilize the display list. |
480 | 480 | #ifdef DEBUG |
481 | 481 | for(int i = 0; i < numSamplesPos; i++) |
482 | - std::cout << pV[i] << std::endl; | |
482 | + std::cout << pV[i].str() << std::endl; | |
483 | 483 | #endif |
484 | 484 | } |
485 | 485 | |
... | ... | @@ -1151,8 +1151,8 @@ class gl_spider // : public virtual gl_texture<T> |
1151 | 1151 | out[3] = temp[2]; |
1152 | 1152 | } |
1153 | 1153 | #ifdef DEBUG |
1154 | -// std::cout << "out is " << out << std::endl; | |
1155 | -// std::cout << "when rotating from " << from << " to " << dir << std::endl; | |
1154 | + std::cout << "out is " << out.str() << std::endl; | |
1155 | + std::cout << "when rotating from " << from.str() << " to " << dir.str() << std::endl; | |
1156 | 1156 | #endif |
1157 | 1157 | return out; |
1158 | 1158 | } |
... | ... | @@ -1545,7 +1545,7 @@ class gl_spider // : public virtual gl_texture<T> |
1545 | 1545 | setMagnitude(curSeedMag); |
1546 | 1546 | |
1547 | 1547 | #ifdef DEBUG |
1548 | - std::cout << "The new seed " << curSeed << curSeedVec << curSeedMag << std::endl; | |
1548 | + std::cout << "The new seed " << curSeed.str() << curSeedVec.str() << curSeedMag << std::endl; | |
1549 | 1549 | #endif |
1550 | 1550 | |
1551 | 1551 | // Bind(direction_texID, direction_buffID, numSamples, n_pixels); | ... | ... |
stim/grids/image_stack.h
stim/image/image.h
... | ... | @@ -10,6 +10,7 @@ |
10 | 10 | #include <limits> |
11 | 11 | #include <typeinfo> |
12 | 12 | #include <fstream> |
13 | +#include <cstring> | |
13 | 14 | |
14 | 15 | namespace stim{ |
15 | 16 | /// This static class provides the STIM interface for loading, saving, and storing 2D images. |
... | ... | @@ -74,18 +75,7 @@ class image{ |
74 | 75 | #endif |
75 | 76 | /// Returns the value for "white" based on the dynamic range (assumes white is 1.0 for floating point images) |
76 | 77 | T white(){ |
77 | - | |
78 | - if(typeid(T) == typeid(unsigned char)) return UCHAR_MAX; | |
79 | - if(typeid(T) == typeid(unsigned short)) return SHRT_MAX; | |
80 | - if(typeid(T) == typeid(unsigned)) return UINT_MAX; | |
81 | - if(typeid(T) == typeid(unsigned long)) return ULONG_MAX; | |
82 | - if(typeid(T) == typeid(unsigned long long)) return ULLONG_MAX; | |
83 | - if(typeid(T) == typeid(float)) return 1.0f; | |
84 | - if(typeid(T) == typeid(double)) return 1.0; | |
85 | - | |
86 | - std::cout<<"ERROR in stim::image::white - no white value known for this data type"<<std::endl; | |
87 | - exit(1); | |
88 | - | |
78 | + return std::numeric_limits<T>::max(); | |
89 | 79 | } |
90 | 80 | |
91 | 81 | ... | ... |
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
1 | +#ifndef RTS_BESSEL_H | |
2 | +#define RTS_BESSEL_H | |
3 | + | |
4 | +#define _USE_MATH_DEFINES | |
5 | +#include <math.h> | |
6 | +#include "../math/complex.h" | |
7 | +#define eps 1e-15 | |
8 | +#define el 0.5772156649015329 | |
9 | + | |
10 | + | |
11 | +namespace stim{ | |
12 | + | |
13 | +static complex<double> cii(0.0,1.0); | |
14 | +static complex<double> cone(1.0,0.0); | |
15 | +static complex<double> czero(0.0,0.0); | |
16 | + | |
17 | +template< typename P > | |
18 | +P gamma(P x) | |
19 | +{ | |
20 | + int i,k,m; | |
21 | + P ga,gr,r,z; | |
22 | + | |
23 | + static P g[] = { | |
24 | + 1.0, | |
25 | + 0.5772156649015329, | |
26 | + -0.6558780715202538, | |
27 | + -0.420026350340952e-1, | |
28 | + 0.1665386113822915, | |
29 | + -0.421977345555443e-1, | |
30 | + -0.9621971527877e-2, | |
31 | + 0.7218943246663e-2, | |
32 | + -0.11651675918591e-2, | |
33 | + -0.2152416741149e-3, | |
34 | + 0.1280502823882e-3, | |
35 | + -0.201348547807e-4, | |
36 | + -0.12504934821e-5, | |
37 | + 0.1133027232e-5, | |
38 | + -0.2056338417e-6, | |
39 | + 0.6116095e-8, | |
40 | + 0.50020075e-8, | |
41 | + -0.11812746e-8, | |
42 | + 0.1043427e-9, | |
43 | + 0.77823e-11, | |
44 | + -0.36968e-11, | |
45 | + 0.51e-12, | |
46 | + -0.206e-13, | |
47 | + -0.54e-14, | |
48 | + 0.14e-14}; | |
49 | + | |
50 | + if (x > 171.0) return 1e308; // This value is an overflow flag. | |
51 | + if (x == (int)x) { | |
52 | + if (x > 0.0) { | |
53 | + ga = 1.0; // use factorial | |
54 | + for (i=2;i<x;i++) { | |
55 | + ga *= i; | |
56 | + } | |
57 | + } | |
58 | + else | |
59 | + ga = 1e308; | |
60 | + } | |
61 | + else { | |
62 | + if (fabs(x) > 1.0) { | |
63 | + z = fabs(x); | |
64 | + m = (int)z; | |
65 | + r = 1.0; | |
66 | + for (k=1;k<=m;k++) { | |
67 | + r *= (z-k); | |
68 | + } | |
69 | + z -= m; | |
70 | + } | |
71 | + else | |
72 | + z = x; | |
73 | + gr = g[24]; | |
74 | + for (k=23;k>=0;k--) { | |
75 | + gr = gr*z+g[k]; | |
76 | + } | |
77 | + ga = 1.0/(gr*z); | |
78 | + if (fabs(x) > 1.0) { | |
79 | + ga *= r; | |
80 | + if (x < 0.0) { | |
81 | + ga = -M_PI/(x*ga*sin(M_PI*x)); | |
82 | + } | |
83 | + } | |
84 | + } | |
85 | + return ga; | |
86 | +} | |
87 | + | |
88 | +template<typename P> | |
89 | +int bessjy01a(P x,P &j0,P &j1,P &y0,P &y1, | |
90 | + P &j0p,P &j1p,P &y0p,P &y1p) | |
91 | +{ | |
92 | + P x2,r,ec,w0,w1,r0,r1,cs0,cs1; | |
93 | + P cu,p0,q0,p1,q1,t1,t2; | |
94 | + int k,kz; | |
95 | + static P a[] = { | |
96 | + -7.03125e-2, | |
97 | + 0.112152099609375, | |
98 | + -0.5725014209747314, | |
99 | + 6.074042001273483, | |
100 | + -1.100171402692467e2, | |
101 | + 3.038090510922384e3, | |
102 | + -1.188384262567832e5, | |
103 | + 6.252951493434797e6, | |
104 | + -4.259392165047669e8, | |
105 | + 3.646840080706556e10, | |
106 | + -3.833534661393944e12, | |
107 | + 4.854014686852901e14, | |
108 | + -7.286857349377656e16, | |
109 | + 1.279721941975975e19}; | |
110 | + static P b[] = { | |
111 | + 7.32421875e-2, | |
112 | + -0.2271080017089844, | |
113 | + 1.727727502584457, | |
114 | + -2.438052969955606e1, | |
115 | + 5.513358961220206e2, | |
116 | + -1.825775547429318e4, | |
117 | + 8.328593040162893e5, | |
118 | + -5.006958953198893e7, | |
119 | + 3.836255180230433e9, | |
120 | + -3.649010818849833e11, | |
121 | + 4.218971570284096e13, | |
122 | + -5.827244631566907e15, | |
123 | + 9.476288099260110e17, | |
124 | + -1.792162323051699e20}; | |
125 | + static P a1[] = { | |
126 | + 0.1171875, | |
127 | + -0.1441955566406250, | |
128 | + 0.6765925884246826, | |
129 | + -6.883914268109947, | |
130 | + 1.215978918765359e2, | |
131 | + -3.302272294480852e3, | |
132 | + 1.276412726461746e5, | |
133 | + -6.656367718817688e6, | |
134 | + 4.502786003050393e8, | |
135 | + -3.833857520742790e10, | |
136 | + 4.011838599133198e12, | |
137 | + -5.060568503314727e14, | |
138 | + 7.572616461117958e16, | |
139 | + -1.326257285320556e19}; | |
140 | + static P b1[] = { | |
141 | + -0.1025390625, | |
142 | + 0.2775764465332031, | |
143 | + -1.993531733751297, | |
144 | + 2.724882731126854e1, | |
145 | + -6.038440767050702e2, | |
146 | + 1.971837591223663e4, | |
147 | + -8.902978767070678e5, | |
148 | + 5.310411010968522e7, | |
149 | + -4.043620325107754e9, | |
150 | + 3.827011346598605e11, | |
151 | + -4.406481417852278e13, | |
152 | + 6.065091351222699e15, | |
153 | + -9.833883876590679e17, | |
154 | + 1.855045211579828e20}; | |
155 | + | |
156 | + if (x < 0.0) return 1; | |
157 | + if (x == 0.0) { | |
158 | + j0 = 1.0; | |
159 | + j1 = 0.0; | |
160 | + y0 = -1e308; | |
161 | + y1 = -1e308; | |
162 | + j0p = 0.0; | |
163 | + j1p = 0.5; | |
164 | + y0p = 1e308; | |
165 | + y1p = 1e308; | |
166 | + return 0; | |
167 | + } | |
168 | + x2 = x*x; | |
169 | + if (x <= 12.0) { | |
170 | + j0 = 1.0; | |
171 | + r = 1.0; | |
172 | + for (k=1;k<=30;k++) { | |
173 | + r *= -0.25*x2/(k*k); | |
174 | + j0 += r; | |
175 | + if (fabs(r) < fabs(j0)*1e-15) break; | |
176 | + } | |
177 | + j1 = 1.0; | |
178 | + r = 1.0; | |
179 | + for (k=1;k<=30;k++) { | |
180 | + r *= -0.25*x2/(k*(k+1)); | |
181 | + j1 += r; | |
182 | + if (fabs(r) < fabs(j1)*1e-15) break; | |
183 | + } | |
184 | + j1 *= 0.5*x; | |
185 | + ec = log(0.5*x)+el; | |
186 | + cs0 = 0.0; | |
187 | + w0 = 0.0; | |
188 | + r0 = 1.0; | |
189 | + for (k=1;k<=30;k++) { | |
190 | + w0 += 1.0/k; | |
191 | + r0 *= -0.25*x2/(k*k); | |
192 | + r = r0 * w0; | |
193 | + cs0 += r; | |
194 | + if (fabs(r) < fabs(cs0)*1e-15) break; | |
195 | + } | |
196 | + y0 = M_2_PI*(ec*j0-cs0); | |
197 | + cs1 = 1.0; | |
198 | + w1 = 0.0; | |
199 | + r1 = 1.0; | |
200 | + for (k=1;k<=30;k++) { | |
201 | + w1 += 1.0/k; | |
202 | + r1 *= -0.25*x2/(k*(k+1)); | |
203 | + r = r1*(2.0*w1+1.0/(k+1)); | |
204 | + cs1 += r; | |
205 | + if (fabs(r) < fabs(cs1)*1e-15) break; | |
206 | + } | |
207 | + y1 = M_2_PI * (ec*j1-1.0/x-0.25*x*cs1); | |
208 | + } | |
209 | + else { | |
210 | + if (x >= 50.0) kz = 8; // Can be changed to 10 | |
211 | + else if (x >= 35.0) kz = 10; // " " 12 | |
212 | + else kz = 12; // " " 14 | |
213 | + t1 = x-M_PI_4; | |
214 | + p0 = 1.0; | |
215 | + q0 = -0.125/x; | |
216 | + for (k=0;k<kz;k++) { | |
217 | + p0 += a[k]*pow(x,-2*k-2); | |
218 | + q0 += b[k]*pow(x,-2*k-3); | |
219 | + } | |
220 | + cu = sqrt(M_2_PI/x); | |
221 | + j0 = cu*(p0*cos(t1)-q0*sin(t1)); | |
222 | + y0 = cu*(p0*sin(t1)+q0*cos(t1)); | |
223 | + t2 = x-0.75*M_PI; | |
224 | + p1 = 1.0; | |
225 | + q1 = 0.375/x; | |
226 | + for (k=0;k<kz;k++) { | |
227 | + p1 += a1[k]*pow(x,-2*k-2); | |
228 | + q1 += b1[k]*pow(x,-2*k-3); | |
229 | + } | |
230 | + j1 = cu*(p1*cos(t2)-q1*sin(t2)); | |
231 | + y1 = cu*(p1*sin(t2)+q1*cos(t2)); | |
232 | + } | |
233 | + j0p = -j1; | |
234 | + j1p = j0-j1/x; | |
235 | + y0p = -y1; | |
236 | + y1p = y0-y1/x; | |
237 | + return 0; | |
238 | +} | |
239 | +// | |
240 | +// INPUT: | |
241 | +// double x -- argument of Bessel function | |
242 | +// | |
243 | +// OUTPUT: | |
244 | +// double j0 -- Bessel function of 1st kind, 0th order | |
245 | +// double j1 -- Bessel function of 1st kind, 1st order | |
246 | +// double y0 -- Bessel function of 2nd kind, 0th order | |
247 | +// double y1 -- Bessel function of 2nd kind, 1st order | |
248 | +// double j0p -- derivative of Bessel function of 1st kind, 0th order | |
249 | +// double j1p -- derivative of Bessel function of 1st kind, 1st order | |
250 | +// double y0p -- derivative of Bessel function of 2nd kind, 0th order | |
251 | +// double y1p -- derivative of Bessel function of 2nd kind, 1st order | |
252 | +// | |
253 | +// RETURN: | |
254 | +// int error code: 0 = OK, 1 = error | |
255 | +// | |
256 | +// This algorithm computes the functions using polynomial approximations. | |
257 | +// | |
258 | +template<typename P> | |
259 | +int bessjy01b(P x,P &j0,P &j1,P &y0,P &y1, | |
260 | + P &j0p,P &j1p,P &y0p,P &y1p) | |
261 | +{ | |
262 | + P t,t2,dtmp,a0,p0,q0,p1,q1,ta0,ta1; | |
263 | + if (x < 0.0) return 1; | |
264 | + if (x == 0.0) { | |
265 | + j0 = 1.0; | |
266 | + j1 = 0.0; | |
267 | + y0 = -1e308; | |
268 | + y1 = -1e308; | |
269 | + j0p = 0.0; | |
270 | + j1p = 0.5; | |
271 | + y0p = 1e308; | |
272 | + y1p = 1e308; | |
273 | + return 0; | |
274 | + } | |
275 | + if(x <= 4.0) { | |
276 | + t = x/4.0; | |
277 | + t2 = t*t; | |
278 | + j0 = ((((((-0.5014415e-3*t2+0.76771853e-2)*t2-0.0709253492)*t2+ | |
279 | + 0.4443584263)*t2-1.7777560599)*t2+3.9999973021)*t2 | |
280 | + -3.9999998721)*t2+1.0; | |
281 | + j1 = t*(((((((-0.1289769e-3*t2+0.22069155e-2)*t2-0.0236616773)*t2+ | |
282 | + 0.1777582922)*t2-0.8888839649)*t2+2.6666660544)*t2- | |
283 | + 3.999999971)*t2+1.9999999998); | |
284 | + dtmp = (((((((-0.567433e-4*t2+0.859977e-3)*t2-0.94855882e-2)*t2+ | |
285 | + 0.0772975809)*t2-0.4261737419)*t2+1.4216421221)*t2- | |
286 | + 2.3498519931)*t2+1.0766115157)*t2+0.3674669052; | |
287 | + y0 = M_2_PI*log(0.5*x)*j0+dtmp; | |
288 | + dtmp = (((((((0.6535773e-3*t2-0.0108175626)*t2+0.107657607)*t2- | |
289 | + 0.7268945577)*t2+3.1261399273)*t2-7.3980241381)*t2+ | |
290 | + 6.8529236342)*t2+0.3932562018)*t2-0.6366197726; | |
291 | + y1 = M_2_PI*log(0.5*x)*j1+dtmp/x; | |
292 | + } | |
293 | + else { | |
294 | + t = 4.0/x; | |
295 | + t2 = t*t; | |
296 | + a0 = sqrt(M_2_PI/x); | |
297 | + p0 = ((((-0.9285e-5*t2+0.43506e-4)*t2-0.122226e-3)*t2+ | |
298 | + 0.434725e-3)*t2-0.4394275e-2)*t2+0.999999997; | |
299 | + q0 = t*(((((0.8099e-5*t2-0.35614e-4)*t2+0.85844e-4)*t2- | |
300 | + 0.218024e-3)*t2+0.1144106e-2)*t2-0.031249995); | |
301 | + ta0 = x-M_PI_4; | |
302 | + j0 = a0*(p0*cos(ta0)-q0*sin(ta0)); | |
303 | + y0 = a0*(p0*sin(ta0)+q0*cos(ta0)); | |
304 | + p1 = ((((0.10632e-4*t2-0.50363e-4)*t2+0.145575e-3)*t2 | |
305 | + -0.559487e-3)*t2+0.7323931e-2)*t2+1.000000004; | |
306 | + q1 = t*(((((-0.9173e-5*t2+0.40658e-4)*t2-0.99941e-4)*t2 | |
307 | + +0.266891e-3)*t2-0.1601836e-2)*t2+0.093749994); | |
308 | + ta1 = x-0.75*M_PI; | |
309 | + j1 = a0*(p1*cos(ta1)-q1*sin(ta1)); | |
310 | + y1 = a0*(p1*sin(ta1)+q1*cos(ta1)); | |
311 | + } | |
312 | + j0p = -j1; | |
313 | + j1p = j0-j1/x; | |
314 | + y0p = -y1; | |
315 | + y1p = y0-y1/x; | |
316 | + return 0; | |
317 | +} | |
318 | +template<typename P> | |
319 | +int msta1(P x,int mp) | |
320 | +{ | |
321 | + P a0,f0,f1,f; | |
322 | + int i,n0,n1,nn; | |
323 | + | |
324 | + a0 = fabs(x); | |
325 | + n0 = (int)(1.1*a0)+1; | |
326 | + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-mp; | |
327 | + n1 = n0+5; | |
328 | + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-mp; | |
329 | + for (i=0;i<20;i++) { | |
330 | + nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); | |
331 | + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-mp; | |
332 | + if (std::abs(nn-n1) < 1) break; | |
333 | + n0 = n1; | |
334 | + f0 = f1; | |
335 | + n1 = nn; | |
336 | + f1 = f; | |
337 | + } | |
338 | + return nn; | |
339 | +} | |
340 | +template<typename P> | |
341 | +int msta2(P x,int n,int mp) | |
342 | +{ | |
343 | + P a0,ejn,hmp,f0,f1,f,obj; | |
344 | + int i,n0,n1,nn; | |
345 | + | |
346 | + a0 = fabs(x); | |
347 | + hmp = 0.5*mp; | |
348 | + ejn = 0.5*log10(6.28*n)-n*log10(1.36*a0/n); | |
349 | + if (ejn <= hmp) { | |
350 | + obj = mp; | |
351 | + n0 = (int)(1.1*a0); | |
352 | + if (n0 < 1) n0 = 1; | |
353 | + } | |
354 | + else { | |
355 | + obj = hmp+ejn; | |
356 | + n0 = n; | |
357 | + } | |
358 | + f0 = 0.5*log10(6.28*n0)-n0*log10(1.36*a0/n0)-obj; | |
359 | + n1 = n0+5; | |
360 | + f1 = 0.5*log10(6.28*n1)-n1*log10(1.36*a0/n1)-obj; | |
361 | + for (i=0;i<20;i++) { | |
362 | + nn = (int)(n1-(n1-n0)/(1.0-f0/f1)); | |
363 | + f = 0.5*log10(6.28*nn)-nn*log10(1.36*a0/nn)-obj; | |
364 | + if (std::abs(nn-n1) < 1) break; | |
365 | + n0 = n1; | |
366 | + f0 = f1; | |
367 | + n1 = nn; | |
368 | + f1 = f; | |
369 | + } | |
370 | + return nn+10; | |
371 | +} | |
372 | +// | |
373 | +// INPUT: | |
374 | +// double x -- argument of Bessel function of 1st and 2nd kind. | |
375 | +// int n -- order | |
376 | +// | |
377 | +// OUPUT: | |
378 | +// | |
379 | +// int nm -- highest order actually computed (nm <= n) | |
380 | +// double jn[] -- Bessel function of 1st kind, orders from 0 to nm | |
381 | +// double yn[] -- Bessel function of 2nd kind, orders from 0 to nm | |
382 | +// double j'n[]-- derivative of Bessel function of 1st kind, | |
383 | +// orders from 0 to nm | |
384 | +// double y'n[]-- derivative of Bessel function of 2nd kind, | |
385 | +// orders from 0 to nm | |
386 | +// | |
387 | +// Computes Bessel functions of all order up to 'n' using recurrence | |
388 | +// relations. If 'nm' < 'n' only 'nm' orders are returned. | |
389 | +// | |
390 | +template<typename P> | |
391 | +int bessjyna(int n,P x,int &nm,P *jn,P *yn, | |
392 | + P *jnp,P *ynp) | |
393 | +{ | |
394 | + P bj0,bj1,f,f0,f1,f2,cs; | |
395 | + int i,k,m,ecode; | |
396 | + | |
397 | + nm = n; | |
398 | + if ((x < 0.0) || (n < 0)) return 1; | |
399 | + if (x < 1e-15) { | |
400 | + for (i=0;i<=n;i++) { | |
401 | + jn[i] = 0.0; | |
402 | + yn[i] = -1e308; | |
403 | + jnp[i] = 0.0; | |
404 | + ynp[i] = 1e308; | |
405 | + } | |
406 | + jn[0] = 1.0; | |
407 | + jnp[1] = 0.5; | |
408 | + return 0; | |
409 | + } | |
410 | + ecode = bessjy01a(x,jn[0],jn[1],yn[0],yn[1],jnp[0],jnp[1],ynp[0],ynp[1]); | |
411 | + if (n < 2) return 0; | |
412 | + bj0 = jn[0]; | |
413 | + bj1 = jn[1]; | |
414 | + if (n < (int)0.9*x) { | |
415 | + for (k=2;k<=n;k++) { | |
416 | + jn[k] = 2.0*(k-1.0)*bj1/x-bj0; | |
417 | + bj0 = bj1; | |
418 | + bj1 = jn[k]; | |
419 | + } | |
420 | + } | |
421 | + else { | |
422 | + m = msta1(x,200); | |
423 | + if (m < n) nm = m; | |
424 | + else m = msta2(x,n,15); | |
425 | + f2 = 0.0; | |
426 | + f1 = 1.0e-100; | |
427 | + for (k=m;k>=0;k--) { | |
428 | + f = 2.0*(k+1.0)/x*f1-f2; | |
429 | + if (k <= nm) jn[k] = f; | |
430 | + f2 = f1; | |
431 | + f1 = f; | |
432 | + } | |
433 | + if (fabs(bj0) > fabs(bj1)) cs = bj0/f; | |
434 | + else cs = bj1/f2; | |
435 | + for (k=0;k<=nm;k++) { | |
436 | + jn[k] *= cs; | |
437 | + } | |
438 | + } | |
439 | + for (k=2;k<=nm;k++) { | |
440 | + jnp[k] = jn[k-1]-k*jn[k]/x; | |
441 | + } | |
442 | + f0 = yn[0]; | |
443 | + f1 = yn[1]; | |
444 | + for (k=2;k<=nm;k++) { | |
445 | + f = 2.0*(k-1.0)*f1/x-f0; | |
446 | + yn[k] = f; | |
447 | + f0 = f1; | |
448 | + f1 = f; | |
449 | + } | |
450 | + for (k=2;k<=nm;k++) { | |
451 | + ynp[k] = yn[k-1]-k*yn[k]/x; | |
452 | + } | |
453 | + return 0; | |
454 | +} | |
455 | +// | |
456 | +// Same input and output conventions as above. Different recurrence | |
457 | +// relations used for 'x' < 300. | |
458 | +// | |
459 | +template<typename P> | |
460 | +int bessjynb(int n,P x,int &nm,P *jn,P *yn, | |
461 | + P *jnp,P *ynp) | |
462 | +{ | |
463 | + P t1,t2,f,f1,f2,bj0,bj1,bjk,by0,by1,cu,s0,su,sv; | |
464 | + P ec,bs,byk,p0,p1,q0,q1; | |
465 | + static P a[] = { | |
466 | + -0.7031250000000000e-1, | |
467 | + 0.1121520996093750, | |
468 | + -0.5725014209747314, | |
469 | + 6.074042001273483}; | |
470 | + static P b[] = { | |
471 | + 0.7324218750000000e-1, | |
472 | + -0.2271080017089844, | |
473 | + 1.727727502584457, | |
474 | + -2.438052969955606e1}; | |
475 | + static P a1[] = { | |
476 | + 0.1171875, | |
477 | + -0.1441955566406250, | |
478 | + 0.6765925884246826, | |
479 | + -6.883914268109947}; | |
480 | + static P b1[] = { | |
481 | + -0.1025390625, | |
482 | + 0.2775764465332031, | |
483 | + -1.993531733751297, | |
484 | + 2.724882731126854e1}; | |
485 | + | |
486 | + int i,k,m; | |
487 | + nm = n; | |
488 | + if ((x < 0.0) || (n < 0)) return 1; | |
489 | + if (x < 1e-15) { | |
490 | + for (i=0;i<=n;i++) { | |
491 | + jn[i] = 0.0; | |
492 | + yn[i] = -1e308; | |
493 | + jnp[i] = 0.0; | |
494 | + ynp[i] = 1e308; | |
495 | + } | |
496 | + jn[0] = 1.0; | |
497 | + jnp[1] = 0.5; | |
498 | + return 0; | |
499 | + } | |
500 | + if (x <= 300.0 || n > (int)(0.9*x)) { | |
501 | + if (n == 0) nm = 1; | |
502 | + m = msta1(x,200); | |
503 | + if (m < nm) nm = m; | |
504 | + else m = msta2(x,nm,15); | |
505 | + bs = 0.0; | |
506 | + su = 0.0; | |
507 | + sv = 0.0; | |
508 | + f2 = 0.0; | |
509 | + f1 = 1.0e-100; | |
510 | + for (k = m;k>=0;k--) { | |
511 | + f = 2.0*(k+1.0)/x*f1 - f2; | |
512 | + if (k <= nm) jn[k] = f; | |
513 | + if ((k == 2*(int)(k/2)) && (k != 0)) { | |
514 | + bs += 2.0*f; | |
515 | +// su += pow(-1,k>>1)*f/(double)k; | |
516 | + su += (-1)*((k & 2)-1)*f/(P)k; | |
517 | + } | |
518 | + else if (k > 1) { | |
519 | +// sv += pow(-1,k>>1)*k*f/(k*k-1.0); | |
520 | + sv += (-1)*((k & 2)-1)*(P)k*f/(k*k-1.0); | |
521 | + } | |
522 | + f2 = f1; | |
523 | + f1 = f; | |
524 | + } | |
525 | + s0 = bs+f; | |
526 | + for (k=0;k<=nm;k++) { | |
527 | + jn[k] /= s0; | |
528 | + } | |
529 | + ec = log(0.5*x) +0.5772156649015329; | |
530 | + by0 = M_2_PI*(ec*jn[0]-4.0*su/s0); | |
531 | + yn[0] = by0; | |
532 | + by1 = M_2_PI*((ec-1.0)*jn[1]-jn[0]/x-4.0*sv/s0); | |
533 | + yn[1] = by1; | |
534 | + } | |
535 | + else { | |
536 | + t1 = x-M_PI_4; | |
537 | + p0 = 1.0; | |
538 | + q0 = -0.125/x; | |
539 | + for (k=0;k<4;k++) { | |
540 | + p0 += a[k]*pow(x,-2*k-2); | |
541 | + q0 += b[k]*pow(x,-2*k-3); | |
542 | + } | |
543 | + cu = sqrt(M_2_PI/x); | |
544 | + bj0 = cu*(p0*cos(t1)-q0*sin(t1)); | |
545 | + by0 = cu*(p0*sin(t1)+q0*cos(t1)); | |
546 | + jn[0] = bj0; | |
547 | + yn[0] = by0; | |
548 | + t2 = x-0.75*M_PI; | |
549 | + p1 = 1.0; | |
550 | + q1 = 0.375/x; | |
551 | + for (k=0;k<4;k++) { | |
552 | + p1 += a1[k]*pow(x,-2*k-2); | |
553 | + q1 += b1[k]*pow(x,-2*k-3); | |
554 | + } | |
555 | + bj1 = cu*(p1*cos(t2)-q1*sin(t2)); | |
556 | + by1 = cu*(p1*sin(t2)+q1*cos(t2)); | |
557 | + jn[1] = bj1; | |
558 | + yn[1] = by1; | |
559 | + for (k=2;k<=nm;k++) { | |
560 | + bjk = 2.0*(k-1.0)*bj1/x-bj0; | |
561 | + jn[k] = bjk; | |
562 | + bj0 = bj1; | |
563 | + bj1 = bjk; | |
564 | + } | |
565 | + } | |
566 | + jnp[0] = -jn[1]; | |
567 | + for (k=1;k<=nm;k++) { | |
568 | + jnp[k] = jn[k-1]-k*jn[k]/x; | |
569 | + } | |
570 | + for (k=2;k<=nm;k++) { | |
571 | + byk = 2.0*(k-1.0)*by1/x-by0; | |
572 | + yn[k] = byk; | |
573 | + by0 = by1; | |
574 | + by1 = byk; | |
575 | + } | |
576 | + ynp[0] = -yn[1]; | |
577 | + for (k=1;k<=nm;k++) { | |
578 | + ynp[k] = yn[k-1]-k*yn[k]/x; | |
579 | + } | |
580 | + return 0; | |
581 | + | |
582 | +} | |
583 | + | |
584 | +// The following routine computes Bessel Jv(x) and Yv(x) for | |
585 | +// arbitrary positive order (v). For negative order, use: | |
586 | +// | |
587 | +// J-v(x) = Jv(x)cos(v pi) - Yv(x)sin(v pi) | |
588 | +// Y-v(x) = Jv(x)sin(v pi) + Yv(x)cos(v pi) | |
589 | +// | |
590 | +template<typename P> | |
591 | +int bessjyv(P v,P x,P &vm,P *jv,P *yv, | |
592 | + P *djv,P *dyv) | |
593 | +{ | |
594 | + P v0,vl,vg,vv,a,a0,r,x2,bjv0,bjv1,bjvl,f,f0,f1,f2; | |
595 | + P r0,r1,ck,cs,cs0,cs1,sk,qx,px,byv0,byv1,rp,xk,rq; | |
596 | + P b,ec,w0,w1,bju0,bju1,pv0,pv1,byvk; | |
597 | + int j,k,l,m,n,kz; | |
598 | + | |
599 | + x2 = x*x; | |
600 | + n = (int)v; | |
601 | + v0 = v-n; | |
602 | + if ((x < 0.0) || (v < 0.0)) return 1; | |
603 | + if (x < 1e-15) { | |
604 | + for (k=0;k<=n;k++) { | |
605 | + jv[k] = 0.0; | |
606 | + yv[k] = -1e308; | |
607 | + djv[k] = 0.0; | |
608 | + dyv[k] = 1e308; | |
609 | + if (v0 == 0.0) { | |
610 | + jv[0] = 1.0; | |
611 | + djv[1] = 0.5; | |
612 | + } | |
613 | + else djv[0] = 1e308; | |
614 | + } | |
615 | + vm = v; | |
616 | + return 0; | |
617 | + } | |
618 | + if (x <= 12.0) { | |
619 | + for (l=0;l<2;l++) { | |
620 | + vl = v0 + l; | |
621 | + bjvl = 1.0; | |
622 | + r = 1.0; | |
623 | + for (k=1;k<=40;k++) { | |
624 | + r *= -0.25*x2/(k*(k+vl)); | |
625 | + bjvl += r; | |
626 | + if (fabs(r) < fabs(bjvl)*1e-15) break; | |
627 | + } | |
628 | + vg = 1.0 + vl; | |
629 | + a = pow(0.5*x,vl)/gamma(vg); | |
630 | + if (l == 0) bjv0 = bjvl*a; | |
631 | + else bjv1 = bjvl*a; | |
632 | + } | |
633 | + } | |
634 | + else { | |
635 | + if (x >= 50.0) kz = 8; | |
636 | + else if (x >= 35.0) kz = 10; | |
637 | + else kz = 11; | |
638 | + for (j=0;j<2;j++) { | |
639 | + vv = 4.0*(j+v0)*(j+v0); | |
640 | + px = 1.0; | |
641 | + rp = 1.0; | |
642 | + for (k=1;k<=kz;k++) { | |
643 | + rp *= (-0.78125e-2)*(vv-pow(4.0*k-3.0,2.0))* | |
644 | + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*x2); | |
645 | + px += rp; | |
646 | + } | |
647 | + qx = 1.0; | |
648 | + rq = 1.0; | |
649 | + for (k=1;k<=kz;k++) { | |
650 | + rq *= (-0.78125e-2)*(vv-pow(4.0*k-1.0,2.0))* | |
651 | + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*x2); | |
652 | + qx += rq; | |
653 | + } | |
654 | + qx *= 0.125*(vv-1.0)/x; | |
655 | + xk = x-(0.5*(j+v0)+0.25)*M_PI; | |
656 | + a0 = sqrt(M_2_PI/x); | |
657 | + ck = cos(xk); | |
658 | + sk = sin(xk); | |
659 | + | |
660 | + if (j == 0) { | |
661 | + bjv0 = a0*(px*ck-qx*sk); | |
662 | + byv0 = a0*(px*sk+qx*ck); | |
663 | + } | |
664 | + else if (j == 1) { | |
665 | + bjv1 = a0*(px*ck-qx*sk); | |
666 | + byv1 = a0*(px*sk+qx*ck); | |
667 | + } | |
668 | + } | |
669 | + } | |
670 | + jv[0] = bjv0; | |
671 | + jv[1] = bjv1; | |
672 | + djv[0] = v0*jv[0]/x-jv[1]; | |
673 | + djv[1] = -(1.0+v0)*jv[1]/x+jv[0]; | |
674 | + if ((n >= 2) && (n <= (int)(0.9*x))) { | |
675 | + f0 = bjv0; | |
676 | + f1 = bjv1; | |
677 | + for (k=2;k<=n;k++) { | |
678 | + f = 2.0*(k+v0-1.0)*f1/x-f0; | |
679 | + jv[k] = f; | |
680 | + f0 = f1; | |
681 | + f1 = f; | |
682 | + } | |
683 | + } | |
684 | + else if (n >= 2) { | |
685 | + m = msta1(x,200); | |
686 | + if (m < n) n = m; | |
687 | + else m = msta2(x,n,15); | |
688 | + f2 = 0.0; | |
689 | + f1 = 1.0e-100; | |
690 | + for (k=m;k>=0;k--) { | |
691 | + f = 2.0*(v0+k+1.0)*f1/x-f2; | |
692 | + if (k <= n) jv[k] = f; | |
693 | + f2 = f1; | |
694 | + f1 = f; | |
695 | + } | |
696 | + if (fabs(bjv0) > fabs(bjv1)) cs = bjv0/f; | |
697 | + else cs = bjv1/f2; | |
698 | + for (k=0;k<=n;k++) { | |
699 | + jv[k] *= cs; | |
700 | + } | |
701 | + } | |
702 | + for (k=2;k<=n;k++) { | |
703 | + djv[k] = -(k+v0)*jv[k]/x+jv[k-1]; | |
704 | + } | |
705 | + if (x <= 12.0) { | |
706 | + if (v0 != 0.0) { | |
707 | + for (l=0;l<2;l++) { | |
708 | + vl = v0 +l; | |
709 | + bjvl = 1.0; | |
710 | + r = 1.0; | |
711 | + for (k=1;k<=40;k++) { | |
712 | + r *= -0.25*x2/(k*(k-vl)); | |
713 | + bjvl += r; | |
714 | + if (fabs(r) < fabs(bjvl)*1e-15) break; | |
715 | + } | |
716 | + vg = 1.0-vl; | |
717 | + b = pow(2.0/x,vl)/gamma(vg); | |
718 | + if (l == 0) bju0 = bjvl*b; | |
719 | + else bju1 = bjvl*b; | |
720 | + } | |
721 | + pv0 = M_PI*v0; | |
722 | + pv1 = M_PI*(1.0+v0); | |
723 | + byv0 = (bjv0*cos(pv0)-bju0)/sin(pv0); | |
724 | + byv1 = (bjv1*cos(pv1)-bju1)/sin(pv1); | |
725 | + } | |
726 | + else { | |
727 | + ec = log(0.5*x)+el; | |
728 | + cs0 = 0.0; | |
729 | + w0 = 0.0; | |
730 | + r0 = 1.0; | |
731 | + for (k=1;k<=30;k++) { | |
732 | + w0 += 1.0/k; | |
733 | + r0 *= -0.25*x2/(k*k); | |
734 | + cs0 += r0*w0; | |
735 | + } | |
736 | + byv0 = M_2_PI*(ec*bjv0-cs0); | |
737 | + cs1 = 1.0; | |
738 | + w1 = 0.0; | |
739 | + r1 = 1.0; | |
740 | + for (k=1;k<=30;k++) { | |
741 | + w1 += 1.0/k; | |
742 | + r1 *= -0.25*x2/(k*(k+1)); | |
743 | + cs1 += r1*(2.0*w1+1.0/(k+1.0)); | |
744 | + } | |
745 | + byv1 = M_2_PI*(ec*bjv1-1.0/x-0.25*x*cs1); | |
746 | + } | |
747 | + } | |
748 | + yv[0] = byv0; | |
749 | + yv[1] = byv1; | |
750 | + for (k=2;k<=n;k++) { | |
751 | + byvk = 2.0*(v0+k-1.0)*byv1/x-byv0; | |
752 | + yv[k] = byvk; | |
753 | + byv0 = byv1; | |
754 | + byv1 = byvk; | |
755 | + } | |
756 | + dyv[0] = v0*yv[0]/x-yv[1]; | |
757 | + for (k=1;k<=n;k++) { | |
758 | + dyv[k] = -(k+v0)*yv[k]/x+yv[k-1]; | |
759 | + } | |
760 | + vm = n + v0; | |
761 | + return 0; | |
762 | +} | |
763 | + | |
764 | +template<typename P> | |
765 | +int bessjyv_sph(int v, P z, P &vm, P* cjv, | |
766 | + P* cyv, P* cjvp, P* cyvp) | |
767 | +{ | |
768 | + //first, compute the bessel functions of fractional order | |
769 | + bessjyv<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | |
770 | + | |
771 | + //iterate through each and scale | |
772 | + for(int n = 0; n<=v; n++) | |
773 | + { | |
774 | + | |
775 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | |
776 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | |
777 | + | |
778 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | |
779 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | |
780 | + } | |
781 | + | |
782 | + return 0; | |
783 | + | |
784 | +} | |
785 | + | |
786 | +template<typename P> | |
787 | +int cbessjy01(complex<P> z,complex<P> &cj0,complex<P> &cj1, | |
788 | + complex<P> &cy0,complex<P> &cy1,complex<P> &cj0p, | |
789 | + complex<P> &cj1p,complex<P> &cy0p,complex<P> &cy1p) | |
790 | +{ | |
791 | + complex<P> z1,z2,cr,cp,cs,cp0,cq0,cp1,cq1,ct1,ct2,cu; | |
792 | + P a0,w0,w1; | |
793 | + int k,kz; | |
794 | + | |
795 | + static P a[] = { | |
796 | + -7.03125e-2, | |
797 | + 0.112152099609375, | |
798 | + -0.5725014209747314, | |
799 | + 6.074042001273483, | |
800 | + -1.100171402692467e2, | |
801 | + 3.038090510922384e3, | |
802 | + -1.188384262567832e5, | |
803 | + 6.252951493434797e6, | |
804 | + -4.259392165047669e8, | |
805 | + 3.646840080706556e10, | |
806 | + -3.833534661393944e12, | |
807 | + 4.854014686852901e14, | |
808 | + -7.286857349377656e16, | |
809 | + 1.279721941975975e19}; | |
810 | + static P b[] = { | |
811 | + 7.32421875e-2, | |
812 | + -0.2271080017089844, | |
813 | + 1.727727502584457, | |
814 | + -2.438052969955606e1, | |
815 | + 5.513358961220206e2, | |
816 | + -1.825775547429318e4, | |
817 | + 8.328593040162893e5, | |
818 | + -5.006958953198893e7, | |
819 | + 3.836255180230433e9, | |
820 | + -3.649010818849833e11, | |
821 | + 4.218971570284096e13, | |
822 | + -5.827244631566907e15, | |
823 | + 9.476288099260110e17, | |
824 | + -1.792162323051699e20}; | |
825 | + static P a1[] = { | |
826 | + 0.1171875, | |
827 | + -0.1441955566406250, | |
828 | + 0.6765925884246826, | |
829 | + -6.883914268109947, | |
830 | + 1.215978918765359e2, | |
831 | + -3.302272294480852e3, | |
832 | + 1.276412726461746e5, | |
833 | + -6.656367718817688e6, | |
834 | + 4.502786003050393e8, | |
835 | + -3.833857520742790e10, | |
836 | + 4.011838599133198e12, | |
837 | + -5.060568503314727e14, | |
838 | + 7.572616461117958e16, | |
839 | + -1.326257285320556e19}; | |
840 | + static P b1[] = { | |
841 | + -0.1025390625, | |
842 | + 0.2775764465332031, | |
843 | + -1.993531733751297, | |
844 | + 2.724882731126854e1, | |
845 | + -6.038440767050702e2, | |
846 | + 1.971837591223663e4, | |
847 | + -8.902978767070678e5, | |
848 | + 5.310411010968522e7, | |
849 | + -4.043620325107754e9, | |
850 | + 3.827011346598605e11, | |
851 | + -4.406481417852278e13, | |
852 | + 6.065091351222699e15, | |
853 | + -9.833883876590679e17, | |
854 | + 1.855045211579828e20}; | |
855 | + | |
856 | + a0 = abs(z); | |
857 | + z2 = z*z; | |
858 | + z1 = z; | |
859 | + if (a0 == 0.0) { | |
860 | + cj0 = cone; | |
861 | + cj1 = czero; | |
862 | + cy0 = complex<P>(-1e308,0); | |
863 | + cy1 = complex<P>(-1e308,0); | |
864 | + cj0p = czero; | |
865 | + cj1p = complex<P>(0.5,0.0); | |
866 | + cy0p = complex<P>(1e308,0); | |
867 | + cy1p = complex<P>(1e308,0); | |
868 | + return 0; | |
869 | + } | |
870 | + if (real(z) < 0.0) z1 = -z; | |
871 | + if (a0 <= 12.0) { | |
872 | + cj0 = cone; | |
873 | + cr = cone; | |
874 | + for (k=1;k<=40;k++) { | |
875 | + cr *= -0.25*z2/(P)(k*k); | |
876 | + cj0 += cr; | |
877 | + if (abs(cr) < abs(cj0)*eps) break; | |
878 | + } | |
879 | + cj1 = cone; | |
880 | + cr = cone; | |
881 | + for (k=1;k<=40;k++) { | |
882 | + cr *= -0.25*z2/(k*(k+1.0)); | |
883 | + cj1 += cr; | |
884 | + if (abs(cr) < abs(cj1)*eps) break; | |
885 | + } | |
886 | + cj1 *= 0.5*z1; | |
887 | + w0 = 0.0; | |
888 | + cr = cone; | |
889 | + cs = czero; | |
890 | + for (k=1;k<=40;k++) { | |
891 | + w0 += 1.0/k; | |
892 | + cr *= -0.25*z2/(P)(k*k); | |
893 | + cp = cr*w0; | |
894 | + cs += cp; | |
895 | + if (abs(cp) < abs(cs)*eps) break; | |
896 | + } | |
897 | + cy0 = M_2_PI*((log(0.5*z1)+el)*cj0-cs); | |
898 | + w1 = 0.0; | |
899 | + cr = cone; | |
900 | + cs = cone; | |
901 | + for (k=1;k<=40;k++) { | |
902 | + w1 += 1.0/k; | |
903 | + cr *= -0.25*z2/(k*(k+1.0)); | |
904 | + cp = cr*(2.0*w1+1.0/(k+1.0)); | |
905 | + cs += cp; | |
906 | + if (abs(cp) < abs(cs)*eps) break; | |
907 | + } | |
908 | + cy1 = M_2_PI*((log(0.5*z1)+el)*cj1-1.0/z1-0.25*z1*cs); | |
909 | + } | |
910 | + else { | |
911 | + if (a0 >= 50.0) kz = 8; // can be changed to 10 | |
912 | + else if (a0 >= 35.0) kz = 10; // " " " 12 | |
913 | + else kz = 12; // " " " 14 | |
914 | + ct1 = z1 - M_PI_4; | |
915 | + cp0 = cone; | |
916 | + for (k=0;k<kz;k++) { | |
917 | + cp0 += a[k]*pow(z1,-2.0*k-2.0); | |
918 | + } | |
919 | + cq0 = -0.125/z1; | |
920 | + for (k=0;k<kz;k++) { | |
921 | + cq0 += b[k]*pow(z1,-2.0*k-3.0); | |
922 | + } | |
923 | + cu = sqrt(M_2_PI/z1); | |
924 | + cj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1)); | |
925 | + cy0 = cu*(cp0*sin(ct1)+cq0*cos(ct1)); | |
926 | + ct2 = z1 - 0.75*M_PI; | |
927 | + cp1 = cone; | |
928 | + for (k=0;k<kz;k++) { | |
929 | + cp1 += a1[k]*pow(z1,-2.0*k-2.0); | |
930 | + } | |
931 | + cq1 = 0.375/z1; | |
932 | + for (k=0;k<kz;k++) { | |
933 | + cq1 += b1[k]*pow(z1,-2.0*k-3.0); | |
934 | + } | |
935 | + cj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2)); | |
936 | + cy1 = cu*(cp1*sin(ct2)+cq1*cos(ct2)); | |
937 | + } | |
938 | + if (real(z) < 0.0) { | |
939 | + if (imag(z) < 0.0) { | |
940 | + cy0 -= 2.0*cii*cj0; | |
941 | + cy1 = -(cy1-2.0*cii*cj1); | |
942 | + } | |
943 | + else if (imag(z) > 0.0) { | |
944 | + cy0 += 2.0*cii*cj0; | |
945 | + cy1 = -(cy1+2.0*cii*cj1); | |
946 | + } | |
947 | + cj1 = -cj1; | |
948 | + } | |
949 | + cj0p = -cj1; | |
950 | + cj1p = cj0-cj1/z; | |
951 | + cy0p = -cy1; | |
952 | + cy1p = cy0-cy1/z; | |
953 | + return 0; | |
954 | +} | |
955 | + | |
956 | +template<typename P> | |
957 | +int cbessjyna(int n,complex<P> z,int &nm,complex<P> *cj, | |
958 | + complex<P> *cy,complex<P> *cjp,complex<P> *cyp) | |
959 | +{ | |
960 | + complex<P> cbj0,cbj1,cby0,cby1,cj0,cjk,cj1,cf,cf1,cf2; | |
961 | + complex<P> cs,cg0,cg1,cyk,cyl1,cyl2,cylk,cp11,cp12,cp21,cp22; | |
962 | + complex<P> ch0,ch1,ch2; | |
963 | + P a0,yak,ya1,ya0,wa; | |
964 | + int m,k,lb,lb0; | |
965 | + | |
966 | + if (n < 0) return 1; | |
967 | + a0 = abs(z); | |
968 | + nm = n; | |
969 | + if (a0 < 1.0e-100) { | |
970 | + for (k=0;k<=n;k++) { | |
971 | + cj[k] = czero; | |
972 | + cy[k] = complex<P> (-1e308,0); | |
973 | + cjp[k] = czero; | |
974 | + cyp[k] = complex<P>(1e308,0); | |
975 | + } | |
976 | + cj[0] = cone; | |
977 | + cjp[1] = complex<P>(0.5,0.0); | |
978 | + return 0; | |
979 | + } | |
980 | + cbessjy01(z,cj[0],cj[1],cy[0],cy[1],cjp[0],cjp[1],cyp[0],cyp[1]); | |
981 | + cbj0 = cj[0]; | |
982 | + cbj1 = cj[1]; | |
983 | + cby0 = cy[0]; | |
984 | + cby1 = cy[1]; | |
985 | + if (n <= 1) return 0; | |
986 | + if (n < (int)0.25*a0) { | |
987 | + cj0 = cbj0; | |
988 | + cj1 = cbj1; | |
989 | + for (k=2;k<=n;k++) { | |
990 | + cjk = 2.0*(k-1.0)*cj1/z-cj0; | |
991 | + cj[k] = cjk; | |
992 | + cj0 = cj1; | |
993 | + cj1 = cjk; | |
994 | + } | |
995 | + } | |
996 | + else { | |
997 | + m = msta1(a0,200); | |
998 | + if (m < n) nm = m; | |
999 | + else m = msta2(a0,n,15); | |
1000 | + cf2 = czero; | |
1001 | + cf1 = complex<P> (1.0e-100,0.0); | |
1002 | + for (k=m;k>=0;k--) { | |
1003 | + cf = 2.0*(k+1.0)*cf1/z-cf2; | |
1004 | + if (k <=nm) cj[k] = cf; | |
1005 | + cf2 = cf1; | |
1006 | + cf1 = cf; | |
1007 | + } | |
1008 | + if (abs(cbj0) > abs(cbj1)) cs = cbj0/cf; | |
1009 | + else cs = cbj1/cf2; | |
1010 | + for (k=0;k<=nm;k++) { | |
1011 | + cj[k] *= cs; | |
1012 | + } | |
1013 | + } | |
1014 | + for (k=2;k<=nm;k++) { | |
1015 | + cjp[k] = cj[k-1]-(P)k*cj[k]/z; | |
1016 | + } | |
1017 | + ya0 = abs(cby0); | |
1018 | + lb = 0; | |
1019 | + cg0 = cby0; | |
1020 | + cg1 = cby1; | |
1021 | + for (k=2;k<=nm;k++) { | |
1022 | + cyk = 2.0*(k-1.0)*cg1/z-cg0; | |
1023 | + yak = abs(cyk); | |
1024 | + ya1 = abs(cg0); | |
1025 | + if ((yak < ya0) && (yak < ya1)) lb = k; | |
1026 | + cy[k] = cyk; | |
1027 | + cg0 = cg1; | |
1028 | + cg1 = cyk; | |
1029 | + } | |
1030 | + lb0 = 0; | |
1031 | + if ((lb > 4) && (imag(z) != 0.0)) { | |
1032 | + while (lb != lb0) { | |
1033 | + ch2 = cone; | |
1034 | + ch1 = czero; | |
1035 | + lb0 = lb; | |
1036 | + for (k=lb;k>=1;k--) { | |
1037 | + ch0 = 2.0*k*ch1/z-ch2; | |
1038 | + ch2 = ch1; | |
1039 | + ch1 = ch0; | |
1040 | + } | |
1041 | + cp12 = ch0; | |
1042 | + cp22 = ch2; | |
1043 | + ch2 = czero; | |
1044 | + ch1 = cone; | |
1045 | + for (k=lb;k>=1;k--) { | |
1046 | + ch0 = 2.0*k*ch1/z-ch2; | |
1047 | + ch2 = ch1; | |
1048 | + ch1 = ch0; | |
1049 | + } | |
1050 | + cp11 = ch0; | |
1051 | + cp21 = ch2; | |
1052 | + if (lb == nm) | |
1053 | + cj[lb+1] = 2.0*lb*cj[lb]/z-cj[lb-1]; | |
1054 | + if (abs(cj[0]) > abs(cj[1])) { | |
1055 | + cy[lb+1] = (cj[lb+1]*cby0-2.0*cp11/(M_PI*z))/cj[0]; | |
1056 | + cy[lb] = (cj[lb]*cby0+2.0*cp12/(M_PI*z))/cj[0]; | |
1057 | + } | |
1058 | + else { | |
1059 | + cy[lb+1] = (cj[lb+1]*cby1-2.0*cp21/(M_PI*z))/cj[1]; | |
1060 | + cy[lb] = (cj[lb]*cby1+2.0*cp22/(M_PI*z))/cj[1]; | |
1061 | + } | |
1062 | + cyl2 = cy[lb+1]; | |
1063 | + cyl1 = cy[lb]; | |
1064 | + for (k=lb-1;k>=0;k--) { | |
1065 | + cylk = 2.0*(k+1.0)*cyl1/z-cyl2; | |
1066 | + cy[k] = cylk; | |
1067 | + cyl2 = cyl1; | |
1068 | + cyl1 = cylk; | |
1069 | + } | |
1070 | + cyl1 = cy[lb]; | |
1071 | + cyl2 = cy[lb+1]; | |
1072 | + for (k=lb+1;k<n;k++) { | |
1073 | + cylk = 2.0*k*cyl2/z-cyl1; | |
1074 | + cy[k+1] = cylk; | |
1075 | + cyl1 = cyl2; | |
1076 | + cyl2 = cylk; | |
1077 | + } | |
1078 | + for (k=2;k<=nm;k++) { | |
1079 | + wa = abs(cy[k]); | |
1080 | + if (wa < abs(cy[k-1])) lb = k; | |
1081 | + } | |
1082 | + } | |
1083 | + } | |
1084 | + for (k=2;k<=nm;k++) { | |
1085 | + cyp[k] = cy[k-1]-(P)k*cy[k]/z; | |
1086 | + } | |
1087 | + return 0; | |
1088 | +} | |
1089 | + | |
1090 | +template<typename P> | |
1091 | +int cbessjynb(int n,complex<P> z,int &nm,complex<P> *cj, | |
1092 | + complex<P> *cy,complex<P> *cjp,complex<P> *cyp) | |
1093 | +{ | |
1094 | + complex<P> cf,cf0,cf1,cf2,cbs,csu,csv,cs0,ce; | |
1095 | + complex<P> ct1,cp0,cq0,cp1,cq1,cu,cbj0,cby0,cbj1,cby1; | |
1096 | + complex<P> cyy,cbjk,ct2; | |
1097 | + P a0,y0; | |
1098 | + int k,m; | |
1099 | + static P a[] = { | |
1100 | + -0.7031250000000000e-1, | |
1101 | + 0.1121520996093750, | |
1102 | + -0.5725014209747314, | |
1103 | + 6.074042001273483}; | |
1104 | + static P b[] = { | |
1105 | + 0.7324218750000000e-1, | |
1106 | + -0.2271080017089844, | |
1107 | + 1.727727502584457, | |
1108 | + -2.438052969955606e1}; | |
1109 | + static P a1[] = { | |
1110 | + 0.1171875, | |
1111 | + -0.1441955566406250, | |
1112 | + 0.6765925884246826, | |
1113 | + -6.883914268109947}; | |
1114 | + static P b1[] = { | |
1115 | + -0.1025390625, | |
1116 | + 0.2775764465332031, | |
1117 | + -1.993531733751297, | |
1118 | + 2.724882731126854e1}; | |
1119 | + | |
1120 | + y0 = abs(imag(z)); | |
1121 | + a0 = abs(z); | |
1122 | + nm = n; | |
1123 | + if (a0 < 1.0e-100) { | |
1124 | + for (k=0;k<=n;k++) { | |
1125 | + cj[k] = czero; | |
1126 | + cy[k] = complex<P> (-1e308,0); | |
1127 | + cjp[k] = czero; | |
1128 | + cyp[k] = complex<P>(1e308,0); | |
1129 | + } | |
1130 | + cj[0] = cone; | |
1131 | + cjp[1] = complex<P>(0.5,0.0); | |
1132 | + return 0; | |
1133 | + } | |
1134 | + if ((a0 <= 300.0) || (n > (int)(0.25*a0))) { | |
1135 | + if (n == 0) nm = 1; | |
1136 | + m = msta1(a0,200); | |
1137 | + if (m < nm) nm = m; | |
1138 | + else m = msta2(a0,nm,15); | |
1139 | + cbs = czero; | |
1140 | + csu = czero; | |
1141 | + csv = czero; | |
1142 | + cf2 = czero; | |
1143 | + cf1 = complex<P> (1.0e-100,0.0); | |
1144 | + for (k=m;k>=0;k--) { | |
1145 | + cf = 2.0*(k+1.0)*cf1/z-cf2; | |
1146 | + if (k <= nm) cj[k] = cf; | |
1147 | + if (((k & 1) == 0) && (k != 0)) { | |
1148 | + if (y0 <= 1.0) { | |
1149 | + cbs += 2.0*cf; | |
1150 | + } | |
1151 | + else { | |
1152 | + cbs += (-1)*((k & 2)-1)*2.0*cf; | |
1153 | + } | |
1154 | + csu += (P)((-1)*((k & 2)-1))*cf/(P)k; | |
1155 | + } | |
1156 | + else if (k > 1) { | |
1157 | + csv += (P)((-1)*((k & 2)-1)*k)*cf/(P)(k*k-1.0); | |
1158 | + } | |
1159 | + cf2 = cf1; | |
1160 | + cf1 = cf; | |
1161 | + } | |
1162 | + if (y0 <= 1.0) cs0 = cbs+cf; | |
1163 | + else cs0 = (cbs+cf)/cos(z); | |
1164 | + for (k=0;k<=nm;k++) { | |
1165 | + cj[k] /= cs0; | |
1166 | + } | |
1167 | + ce = log(0.5*z)+el; | |
1168 | + cy[0] = M_2_PI*(ce*cj[0]-4.0*csu/cs0); | |
1169 | + cy[1] = M_2_PI*(-cj[0]/z+(ce-1.0)*cj[1]-4.0*csv/cs0); | |
1170 | + } | |
1171 | + else { | |
1172 | + ct1 = z-M_PI_4; | |
1173 | + cp0 = cone; | |
1174 | + for (k=0;k<4;k++) { | |
1175 | + cp0 += a[k]*pow(z,-2.0*k-2.0); | |
1176 | + } | |
1177 | + cq0 = -0.125/z; | |
1178 | + for (k=0;k<4;k++) { | |
1179 | + cq0 += b[k] *pow(z,-2.0*k-3.0); | |
1180 | + } | |
1181 | + cu = sqrt(M_2_PI/z); | |
1182 | + cbj0 = cu*(cp0*cos(ct1)-cq0*sin(ct1)); | |
1183 | + cby0 = cu*(cp0*sin(ct1)+cq0*cos(ct1)); | |
1184 | + cj[0] = cbj0; | |
1185 | + cy[0] = cby0; | |
1186 | + ct2 = z-0.75*M_PI; | |
1187 | + cp1 = cone; | |
1188 | + for (k=0;k<4;k++) { | |
1189 | + cp1 += a1[k]*pow(z,-2.0*k-2.0); | |
1190 | + } | |
1191 | + cq1 = 0.375/z; | |
1192 | + for (k=0;k<4;k++) { | |
1193 | + cq1 += b1[k]*pow(z,-2.0*k-3.0); | |
1194 | + } | |
1195 | + cbj1 = cu*(cp1*cos(ct2)-cq1*sin(ct2)); | |
1196 | + cby1 = cu*(cp1*sin(ct2)+cq1*cos(ct2)); | |
1197 | + cj[1] = cbj1; | |
1198 | + cy[1] = cby1; | |
1199 | + for (k=2;k<=n;k++) { | |
1200 | + cbjk = 2.0*(k-1.0)*cbj1/z-cbj0; | |
1201 | + cj[k] = cbjk; | |
1202 | + cbj0 = cbj1; | |
1203 | + cbj1 = cbjk; | |
1204 | + } | |
1205 | + } | |
1206 | + cjp[0] = -cj[1]; | |
1207 | + for (k=1;k<=nm;k++) { | |
1208 | + cjp[k] = cj[k-1]-(P)k*cj[k]/z; | |
1209 | + } | |
1210 | + if (abs(cj[0]) > 1.0) | |
1211 | + cy[1] = (cj[1]*cy[0]-2.0/(M_PI*z))/cj[0]; | |
1212 | + for (k=2;k<=nm;k++) { | |
1213 | + if (abs(cj[k-1]) >= abs(cj[k-2])) | |
1214 | + cyy = (cj[k]*cy[k-1]-2.0/(M_PI*z))/cj[k-1]; | |
1215 | + else | |
1216 | + cyy = (cj[k]*cy[k-2]-4.0*(k-1.0)/(M_PI*z*z))/cj[k-2]; | |
1217 | + cy[k] = cyy; | |
1218 | + } | |
1219 | + cyp[0] = -cy[1]; | |
1220 | + for (k=1;k<=nm;k++) { | |
1221 | + cyp[k] = cy[k-1]-(P)k*cy[k]/z; | |
1222 | + } | |
1223 | + | |
1224 | + return 0; | |
1225 | +} | |
1226 | + | |
1227 | +template<typename P> | |
1228 | +int cbessjyva(P v,complex<P> z,P &vm,complex<P>*cjv, | |
1229 | + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp) | |
1230 | +{ | |
1231 | + complex<P> z1,z2,zk,cjvl,cr,ca,cjv0,cjv1,cpz,crp; | |
1232 | + complex<P> cqz,crq,ca0,cck,csk,cyv0,cyv1,cju0,cju1,cb; | |
1233 | + complex<P> cs,cs0,cr0,cs1,cr1,cec,cf,cf0,cf1,cf2; | |
1234 | + complex<P> cfac0,cfac1,cg0,cg1,cyk,cp11,cp12,cp21,cp22; | |
1235 | + complex<P> ch0,ch1,ch2,cyl1,cyl2,cylk; | |
1236 | + | |
1237 | + P a0,v0,pv0,pv1,vl,ga,gb,vg,vv,w0,w1,ya0,yak,ya1,wa; | |
1238 | + int j,n,k,kz,l,lb,lb0,m; | |
1239 | + | |
1240 | + a0 = abs(z); | |
1241 | + z1 = z; | |
1242 | + z2 = z*z; | |
1243 | + n = (int)v; | |
1244 | + | |
1245 | + | |
1246 | + v0 = v-n; | |
1247 | + | |
1248 | + pv0 = M_PI*v0; | |
1249 | + pv1 = M_PI*(1.0+v0); | |
1250 | + if (a0 < 1.0e-100) { | |
1251 | + for (k=0;k<=n;k++) { | |
1252 | + cjv[k] = czero; | |
1253 | + cyv[k] = complex<P> (-1e308,0); | |
1254 | + cjvp[k] = czero; | |
1255 | + cyvp[k] = complex<P> (1e308,0); | |
1256 | + | |
1257 | + } | |
1258 | + if (v0 == 0.0) { | |
1259 | + cjv[0] = cone; | |
1260 | + cjvp[1] = complex<P> (0.5,0.0); | |
1261 | + } | |
1262 | + else { | |
1263 | + cjvp[0] = complex<P> (1e308,0); | |
1264 | + } | |
1265 | + vm = v; | |
1266 | + return 0; | |
1267 | + } | |
1268 | + if (real(z1) < 0.0) z1 = -z; | |
1269 | + if (a0 <= 12.0) { | |
1270 | + for (l=0;l<2;l++) { | |
1271 | + vl = v0+l; | |
1272 | + cjvl = cone; | |
1273 | + cr = cone; | |
1274 | + for (k=1;k<=40;k++) { | |
1275 | + cr *= -0.25*z2/(k*(k+vl)); | |
1276 | + cjvl += cr; | |
1277 | + if (abs(cr) < abs(cjvl)*eps) break; | |
1278 | + } | |
1279 | + vg = 1.0 + vl; | |
1280 | + ga = gamma(vg); | |
1281 | + ca = pow(0.5*z1,vl)/ga; | |
1282 | + if (l == 0) cjv0 = cjvl*ca; | |
1283 | + else cjv1 = cjvl*ca; | |
1284 | + } | |
1285 | + } | |
1286 | + else { | |
1287 | + if (a0 >= 50.0) kz = 8; | |
1288 | + else if (a0 >= 35.0) kz = 10; | |
1289 | + else kz = 11; | |
1290 | + for (j=0;j<2;j++) { | |
1291 | + vv = 4.0*(j+v0)*(j+v0); | |
1292 | + cpz = cone; | |
1293 | + crp = cone; | |
1294 | + for (k=1;k<=kz;k++) { | |
1295 | + crp = -0.78125e-2*crp*(vv-pow(4.0*k-3.0,2.0))* | |
1296 | + (vv-pow(4.0*k-1.0,2.0))/(k*(2.0*k-1.0)*z2); | |
1297 | + cpz += crp; | |
1298 | + } | |
1299 | + cqz = cone; | |
1300 | + crq = cone; | |
1301 | + for (k=1;k<=kz;k++) { | |
1302 | + crq = -0.78125e-2*crq*(vv-pow(4.0*k-1.0,2.0))* | |
1303 | + (vv-pow(4.0*k+1.0,2.0))/(k*(2.0*k+1.0)*z2); | |
1304 | + cqz += crq; | |
1305 | + } | |
1306 | + cqz *= 0.125*(vv-1.0)/z1; | |
1307 | + zk = z1-(0.5*(j+v0)+0.25)*M_PI; | |
1308 | + ca0 = sqrt(M_2_PI/z1); | |
1309 | + cck = cos(zk); | |
1310 | + csk = sin(zk); | |
1311 | + if (j == 0) { | |
1312 | + cjv0 = ca0*(cpz*cck-cqz*csk); | |
1313 | + cyv0 = ca0*(cpz*csk+cqz+cck); | |
1314 | + } | |
1315 | + else { | |
1316 | + cjv1 = ca0*(cpz*cck-cqz*csk); | |
1317 | + cyv1 = ca0*(cpz*csk+cqz*cck); | |
1318 | + } | |
1319 | + } | |
1320 | + } | |
1321 | + if (a0 <= 12.0) { | |
1322 | + if (v0 != 0.0) { | |
1323 | + for (l=0;l<2;l++) { | |
1324 | + vl = v0+l; | |
1325 | + cjvl = cone; | |
1326 | + cr = cone; | |
1327 | + for (k=1;k<=40;k++) { | |
1328 | + cr *= -0.25*z2/(k*(k-vl)); | |
1329 | + cjvl += cr; | |
1330 | + if (abs(cr) < abs(cjvl)*eps) break; | |
1331 | + } | |
1332 | + vg = 1.0-vl; | |
1333 | + gb = gamma(vg); | |
1334 | + cb = pow(2.0/z1,vl)/gb; | |
1335 | + if (l == 0) cju0 = cjvl*cb; | |
1336 | + else cju1 = cjvl*cb; | |
1337 | + } | |
1338 | + cyv0 = (cjv0*cos(pv0)-cju0)/sin(pv0); | |
1339 | + cyv1 = (cjv1*cos(pv1)-cju1)/sin(pv1); | |
1340 | + } | |
1341 | + else { | |
1342 | + cec = log(0.5*z1)+el; | |
1343 | + cs0 = czero; | |
1344 | + w0 = 0.0; | |
1345 | + cr0 = cone; | |
1346 | + for (k=1;k<=30;k++) { | |
1347 | + w0 += 1.0/k; | |
1348 | + cr0 *= -0.25*z2/(P)(k*k); | |
1349 | + cs0 += cr0*w0; | |
1350 | + } | |
1351 | + cyv0 = M_2_PI*(cec*cjv0-cs0); | |
1352 | + cs1 = cone; | |
1353 | + w1 = 0.0; | |
1354 | + cr1 = cone; | |
1355 | + for (k=1;k<=30;k++) { | |
1356 | + w1 += 1.0/k; | |
1357 | + cr1 *= -0.25*z2/(k*(k+1.0)); | |
1358 | + cs1 += cr1*(2.0*w1+1.0/(k+1.0)); | |
1359 | + } | |
1360 | + cyv1 = M_2_PI*(cec*cjv1-1.0/z1-0.25*z1*cs1); | |
1361 | + } | |
1362 | + } | |
1363 | + if (real(z) < 0.0) { | |
1364 | + cfac0 = exp(pv0*cii); | |
1365 | + cfac1 = exp(pv1*cii); | |
1366 | + if (imag(z) < 0.0) { | |
1367 | + cyv0 = cfac0*cyv0-(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; | |
1368 | + cyv1 = cfac1*cyv1-(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; | |
1369 | + cjv0 /= cfac0; | |
1370 | + cjv1 /= cfac1; | |
1371 | + } | |
1372 | + else if (imag(z) > 0.0) { | |
1373 | + cyv0 = cyv0/cfac0+(P)2.0*(complex<P>)cii*cos(pv0)*cjv0; | |
1374 | + cyv1 = cyv1/cfac1+(P)2.0*(complex<P>)cii*cos(pv1)*cjv1; | |
1375 | + cjv0 *= cfac0; | |
1376 | + cjv1 *= cfac1; | |
1377 | + } | |
1378 | + } | |
1379 | + cjv[0] = cjv0; | |
1380 | + cjv[1] = cjv1; | |
1381 | + if ((n >= 2) && (n <= (int)(0.25*a0))) { | |
1382 | + cf0 = cjv0; | |
1383 | + cf1 = cjv1; | |
1384 | + for (k=2;k<= n;k++) { | |
1385 | + cf = 2.0*(k+v0-1.0)*cf1/z-cf0; | |
1386 | + cjv[k] = cf; | |
1387 | + cf0 = cf1; | |
1388 | + cf1 = cf; | |
1389 | + } | |
1390 | + } | |
1391 | + else if (n >= 2) { | |
1392 | + m = msta1(a0,200); | |
1393 | + if (m < n) n = m; | |
1394 | + else m = msta2(a0,n,15); | |
1395 | + cf2 = czero; | |
1396 | + cf1 = complex<P>(1.0e-100,0.0); | |
1397 | + for (k=m;k>=0;k--) { | |
1398 | + cf = 2.0*(v0+k+1.0)*cf1/z-cf2; | |
1399 | + if (k <= n) cjv[k] = cf; | |
1400 | + cf2 = cf1; | |
1401 | + cf1 = cf; | |
1402 | + } | |
1403 | + if (abs(cjv0) > abs(cjv1)) cs = cjv0/cf; | |
1404 | + else cs = cjv1/cf2; | |
1405 | + for (k=0;k<=n;k++) { | |
1406 | + cjv[k] *= cs; | |
1407 | + } | |
1408 | + } | |
1409 | + cjvp[0] = v0*cjv[0]/z-cjv[1]; | |
1410 | + for (k=1;k<=n;k++) { | |
1411 | + cjvp[k] = -(k+v0)*cjv[k]/z+cjv[k-1]; | |
1412 | + } | |
1413 | + cyv[0] = cyv0; | |
1414 | + cyv[1] = cyv1; | |
1415 | + ya0 = abs(cyv0); | |
1416 | + lb = 0; | |
1417 | + cg0 = cyv0; | |
1418 | + cg1 = cyv1; | |
1419 | + for (k=2;k<=n;k++) { | |
1420 | + cyk = 2.0*(v0+k-1.0)*cg1/z-cg0; | |
1421 | + yak = abs(cyk); | |
1422 | + ya1 = abs(cg0); | |
1423 | + if ((yak < ya0) && (yak< ya1)) lb = k; | |
1424 | + cyv[k] = cyk; | |
1425 | + cg0 = cg1; | |
1426 | + cg1 = cyk; | |
1427 | + } | |
1428 | + lb0 = 0; | |
1429 | + if ((lb > 4) && (imag(z) != 0.0)) { | |
1430 | + while(lb != lb0) { | |
1431 | + ch2 = cone; | |
1432 | + ch1 = czero; | |
1433 | + lb0 = lb; | |
1434 | + for (k=lb;k>=1;k--) { | |
1435 | + ch0 = 2.0*(k+v0)*ch1/z-ch2; | |
1436 | + ch2 = ch1; | |
1437 | + ch1 = ch0; | |
1438 | + } | |
1439 | + cp12 = ch0; | |
1440 | + cp22 = ch2; | |
1441 | + ch2 = czero; | |
1442 | + ch1 = cone; | |
1443 | + for (k=lb;k>=1;k--) { | |
1444 | + ch0 = 2.0*(k+v0)*ch1/z-ch2; | |
1445 | + ch2 = ch1; | |
1446 | + ch1 = ch0; | |
1447 | + } | |
1448 | + cp11 = ch0; | |
1449 | + cp21 = ch2; | |
1450 | + if (lb == n) | |
1451 | + cjv[lb+1] = 2.0*(lb+v0)*cjv[lb]/z-cjv[lb-1]; | |
1452 | + if (abs(cjv[0]) > abs(cjv[1])) { | |
1453 | + cyv[lb+1] = (cjv[lb+1]*cyv0-2.0*cp11/(M_PI*z))/cjv[0]; | |
1454 | + cyv[lb] = (cjv[lb]*cyv0+2.0*cp12/(M_PI*z))/cjv[0]; | |
1455 | + } | |
1456 | + else { | |
1457 | + cyv[lb+1] = (cjv[lb+1]*cyv1-2.0*cp21/(M_PI*z))/cjv[1]; | |
1458 | + cyv[lb] = (cjv[lb]*cyv1+2.0*cp22/(M_PI*z))/cjv[1]; | |
1459 | + } | |
1460 | + cyl2 = cyv[lb+1]; | |
1461 | + cyl1 = cyv[lb]; | |
1462 | + for (k=lb-1;k>=0;k--) { | |
1463 | + cylk = 2.0*(k+v0+1.0)*cyl1/z-cyl2; | |
1464 | + cyv[k] = cylk; | |
1465 | + cyl2 = cyl1; | |
1466 | + cyl1 = cylk; | |
1467 | + } | |
1468 | + cyl1 = cyv[lb]; | |
1469 | + cyl2 = cyv[lb+1]; | |
1470 | + for (k=lb+1;k<n;k++) { | |
1471 | + cylk = 2.0*(k+v0)*cyl2/z-cyl1; | |
1472 | + cyv[k+1] = cylk; | |
1473 | + cyl1 = cyl2; | |
1474 | + cyl2 = cylk; | |
1475 | + } | |
1476 | + for (k=2;k<=n;k++) { | |
1477 | + wa = abs(cyv[k]); | |
1478 | + if (wa < abs(cyv[k-1])) lb = k; | |
1479 | + } | |
1480 | + } | |
1481 | + } | |
1482 | + cyvp[0] = v0*cyv[0]/z-cyv[1]; | |
1483 | + for (k=1;k<=n;k++) { | |
1484 | + cyvp[k] = cyv[k-1]-(k+v0)*cyv[k]/z; | |
1485 | + } | |
1486 | + vm = n+v0; | |
1487 | + return 0; | |
1488 | +} | |
1489 | + | |
1490 | +template<typename P> | |
1491 | +int cbessjyva_sph(int v,complex<P> z,P &vm,complex<P>*cjv, | |
1492 | + complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp) | |
1493 | +{ | |
1494 | + //first, compute the bessel functions of fractional order | |
1495 | + cbessjyva<P>(v + 0.5, z, vm, cjv, cyv, cjvp, cyvp); | |
1496 | + | |
1497 | + //iterate through each and scale | |
1498 | + for(int n = 0; n<=v; n++) | |
1499 | + { | |
1500 | + | |
1501 | + cjv[n] = cjv[n] * sqrt(stim::PI/(z * 2.0)); | |
1502 | + cyv[n] = cyv[n] * sqrt(stim::PI/(z * 2.0)); | |
1503 | + | |
1504 | + cjvp[n] = -1.0 / (z * 2.0) * cjv[n] + cjvp[n] * sqrt(stim::PI / (z * 2.0)); | |
1505 | + cyvp[n] = -1.0 / (z * 2.0) * cyv[n] + cyvp[n] * sqrt(stim::PI / (z * 2.0)); | |
1506 | + } | |
1507 | + | |
1508 | + return 0; | |
1509 | + | |
1510 | +} | |
1511 | + | |
1512 | +} //end namespace rts | |
1513 | + | |
1514 | + | |
1515 | +#endif | ... | ... |
stim/math/filters/gauss3.h
... | ... | @@ -13,7 +13,7 @@ namespace stim |
13 | 13 | ///@param dimx is the size of in* in the z direction. |
14 | 14 | ///@param stdx is the standard deviation (in pixels) along the x axis. |
15 | 15 | ///@param stdy is the standard deviation (in pixels) along the y axis. |
16 | - ///@param nstds specifies the number of standard deviations of the Gaussian that will be k ept in the kernel. | |
16 | + ///@param nstds specifies the number of standard deviations of the Gaussian that will be kept in the kernel. | |
17 | 17 | template<typename T, typename K> |
18 | 18 | void cpu_gauss3(T* in, K dimx, K dimy, K dimz, K stdx, K stdy, K stdz, size_t nstds = 3) |
19 | 19 | { | ... | ... |
stim/math/matrix.h
... | ... | @@ -37,6 +37,20 @@ struct matrix |
37 | 37 | return *this; |
38 | 38 | } |
39 | 39 | |
40 | + //create a symmetric matrix given the rhs values, given in column-major order | |
41 | + CUDA_CALLABLE void setsym(T rhs[(N*N+N)/2]){ | |
42 | + const size_t L = (N*N+N)/2; //store the number of values | |
43 | + | |
44 | + size_t r, c; | |
45 | + r = c = 0; | |
46 | + for(size_t i = 0; i < L; i++){ //for each value | |
47 | + if(r == c) M[c * N + r] = rhs[i]; | |
48 | + else M[c*N + r] = M[r * N + c] = rhs[i]; | |
49 | + r++; | |
50 | + if(r == N) r = ++c; | |
51 | + } | |
52 | + } | |
53 | + | |
40 | 54 | CUDA_CALLABLE T& operator()(int row, int col) |
41 | 55 | { |
42 | 56 | return M[col * N + row]; |
... | ... | @@ -91,6 +105,14 @@ struct matrix |
91 | 105 | |
92 | 106 | return ss.str(); |
93 | 107 | } |
108 | + | |
109 | + static matrix<T, N> identity() { | |
110 | + matrix<T, N> I; | |
111 | + I = 0; | |
112 | + for (size_t i = 0; i < N; i++) | |
113 | + I.M[i * N + i] = 1; | |
114 | + return I; | |
115 | + } | |
94 | 116 | }; |
95 | 117 | |
96 | 118 | } //end namespace rts | ... | ... |
1 | +#ifndef STIM_MATRIX_SYM_H | |
2 | +#define STIM_MATRIX_SYM_H | |
3 | + | |
4 | +#include <stim/cuda/cudatools/callable.h> | |
5 | +#include <stim/math/matrix.h> | |
6 | + | |
7 | +/* This class represents a rank 2, 3-dimensional tensor viable | |
8 | +for representing tensor fields such as structure and diffusion tensors | |
9 | +*/ | |
10 | +namespace stim{ | |
11 | + | |
12 | +template <typename T, int D> | |
13 | +class matrix_sym{ | |
14 | + | |
15 | +protected: | |
16 | + //values are stored in column-major order as a lower-triangular matrix | |
17 | + T M[D*(D + 1)/2]; | |
18 | + | |
19 | + static size_t idx(size_t r, size_t c) { | |
20 | + //if the index is in the upper-triangular portion, swap the indices | |
21 | + if(r < c){ | |
22 | + size_t t = r; | |
23 | + r = c; | |
24 | + c = t; | |
25 | + } | |
26 | + | |
27 | + size_t ci = (c + 1) * (D + (D - c))/2 - 1; //index to the end of column c | |
28 | + size_t i = ci - (D - r - 1); | |
29 | + return i; | |
30 | + } | |
31 | + | |
32 | + //calculate the row and column given an index | |
33 | + //static void indices(size_t& r, size_t& c, size_t idx) { | |
34 | + // size_t col = 0; | |
35 | + // for ( ; col < D; col++) | |
36 | + // if(idx <= ((D - col + D) * (col + 1)/2 - 1)) | |
37 | + // break; | |
38 | + | |
39 | + // c = col; | |
40 | + // size_t ci = (D - (col - 1) + D) * col / 2 - 1; //index to the end of last column col -1 | |
41 | + // r = idx - ci + c - 1; | |
42 | + //} | |
43 | + static void indices(size_t& r, size_t& c, size_t idx) { | |
44 | + size_t cf = -1/2 * sqrt(4 * D * D + 4 * D - (7 + 8 * idx)) + D - 1/2; | |
45 | + c = ceil(cf); | |
46 | + r = idx - D * c + c * (c + 1) / 2; | |
47 | + } | |
48 | + | |
49 | +public: | |
50 | + //return the symmetric matrix associated with this tensor | |
51 | + stim::matrix<T, D> mat() { | |
52 | + stim::matrix<T, D> r; | |
53 | + r.setsym(M); | |
54 | + return r; | |
55 | + } | |
56 | + | |
57 | + CUDA_CALLABLE T& operator()(int r, int c) { | |
58 | + return M[idx(r, c)]; | |
59 | + } | |
60 | + | |
61 | + CUDA_CALLABLE matrix_sym<T, D> operator=(T rhs) { | |
62 | + int Nsq = D*(D+1)/2; | |
63 | + for(int i=0; i<Nsq; i++) | |
64 | + M[i] = rhs; | |
65 | + | |
66 | + return *this; | |
67 | + } | |
68 | + | |
69 | + CUDA_CALLABLE matrix_sym<T, D> operator=(matrix_sym<T, D> rhs) { | |
70 | + size_t N = D * (D + 1) / 2; | |
71 | + for (size_t i = 0; i < N; i++) M[i] = rhs.M[i]; | |
72 | + return *this; | |
73 | + } | |
74 | + | |
75 | + CUDA_CALLABLE T trace() { | |
76 | + T tr = 0; | |
77 | + for (size_t i = 0; i < D; i++) //for each diagonal value | |
78 | + tr += M[idx(i, i)]; //add the value on the diagonal | |
79 | + return tr; | |
80 | + } | |
81 | + // overload matrix multiply scalar | |
82 | + CUDA_CALLABLE void operator_product(matrix_sym<T, D> &B, T rhs) { | |
83 | + int Nsq = D*(D+1)/2; | |
84 | + for(int i=0; i<Nsq; i++) | |
85 | + B.M[i] *= rhs; | |
86 | + } | |
87 | + | |
88 | + //return the tensor as a string | |
89 | + std::string str() { | |
90 | + std::stringstream ss; | |
91 | + for(int r = 0; r < D; r++){ | |
92 | + ss << "| "; | |
93 | + for(int c=0; c<D; c++) | |
94 | + { | |
95 | + ss << (*this)(r, c) << " "; | |
96 | + } | |
97 | + ss << "|" << std::endl; | |
98 | + } | |
99 | + | |
100 | + return ss.str(); | |
101 | + } | |
102 | + | |
103 | + //returns an identity matrix | |
104 | + static matrix_sym<T, D> identity() { | |
105 | + matrix_sym<T, D> I; | |
106 | + I = 0; | |
107 | + for (size_t i = 0; i < D; i++) | |
108 | + I.M[matrix_sym<T, D>::idx(i, i)] = 1; | |
109 | + return I; | |
110 | + } | |
111 | +}; | |
112 | + | |
113 | + | |
114 | + | |
115 | +} //end namespace stim | |
116 | + | |
117 | + | |
118 | +#endif | ... | ... |
1 | +#ifndef STIM_TENSOR2_H | |
2 | +#define STIM_TENSOR2_H | |
3 | + | |
4 | +#include "matrix_sym.h" | |
5 | + | |
6 | +namespace stim { | |
7 | + | |
8 | +/*This class represents a symmetric rank-2 2D tensor, useful for structure tensors | |
9 | +*/ | |
10 | +template<typename T> | |
11 | +class tensor2 : public matrix_sym<T, 2> { | |
12 | + | |
13 | +protected: | |
14 | + | |
15 | +public: | |
16 | + | |
17 | + //calculate the eigenvectors and eigenvalues of the tensor | |
18 | + CUDA_CALLABLE void eig(stim::matrix<T, 2>& v, stim::matrix<T, 2>& lambda) { | |
19 | + | |
20 | + lambda = 0; //initialize the eigenvalue matrix to zero | |
21 | + | |
22 | + T t = M[0] + M[2]; //calculate the trace of the tensor | |
23 | + T d = M[0] * M[2] - M[1] * M[1]; //calculate the determinant of the tensor | |
24 | + | |
25 | + lambda(0, 0) = t / 2 + sqrt(t*t / 4 - d); | |
26 | + lambda(1, 1) = t / 2 - sqrt(t*t / 4 - d); | |
27 | + | |
28 | + if (M[1] == 0) { | |
29 | + v = stim::matrix<T, 2>::identity(); | |
30 | + } | |
31 | + else { | |
32 | + v(0, 0) = lambda(0, 0) - d; | |
33 | + v(0, 1) = lambda(1, 1) - d; | |
34 | + v(1, 0) = v(1, 1) = M[1]; | |
35 | + } | |
36 | + } | |
37 | + | |
38 | + CUDA_CALLABLE tensor2<T> operator=(stim::matrix_sym<T, 2> rhs){ | |
39 | + stim::matrix_sym<T, 2>::operator=(rhs); | |
40 | + return *this; | |
41 | + } | |
42 | +}; | |
43 | + | |
44 | + | |
45 | +} //end namespace stim | |
46 | + | |
47 | + | |
48 | +#endif | |
0 | 49 | \ No newline at end of file | ... | ... |
1 | +#ifndef STIM_TENSOR3_H | |
2 | +#define STIM_TENSOR3_H | |
3 | + | |
4 | +#include "matrix_sym.h" | |
5 | +#include <stim/math/constants.h> | |
6 | + | |
7 | +namespace stim { | |
8 | + | |
9 | + /*This class represents a symmetric rank-2 2D tensor, useful for structure tensors | |
10 | + */ | |
11 | + | |
12 | + //Matrix ID cheat sheet | |
13 | + // | 0 1 2 | | |
14 | + // | 1 3 4 | | |
15 | + // | 2 4 5 | | |
16 | + template<typename T> | |
17 | + class tensor3 : public matrix_sym<T, 3> { | |
18 | + | |
19 | + protected: | |
20 | + | |
21 | + public: | |
22 | + | |
23 | + //calculates the determinant of the tensor | |
24 | + CUDA_CALLABLE T det() { | |
25 | + return M[0] * M[3] * M[5] + 2 * (M[1] * M[4] * M[2]) - M[2] * M[3] * M[2] - M[1] * M[1] * M[5] - M[0] * M[4] * M[4]; | |
26 | + } | |
27 | + | |
28 | + //calculate the eigenvalues for the tensor | |
29 | + //adapted from https://en.wikipedia.org/wiki/Eigenvalue_algorithm | |
30 | + | |
31 | + CUDA_CALLABLE stim::vec3<T> lambda() { | |
32 | + stim::vec3<T> lam; | |
33 | + T p1 = M[1] * M[1] + M[2] * M[2] + M[4] * M[4]; //calculate the sum of the squared off-diagonal values | |
34 | + if (p1 == 0) { //if this value is zero, the matrix is diagonal | |
35 | + lam[0] = M[0]; //the eigenvalues are the diagonal values | |
36 | + lam[1] = M[3]; | |
37 | + lam[2] = M[5]; | |
38 | + return lam; //return the eigenvalue vector | |
39 | + } | |
40 | + | |
41 | + T tr = matrix_sym<T, 3>::trace(); //calculate the trace of the matrix | |
42 | + T q = tr / 3; | |
43 | + T p2 = (M[0] - q) * (M[0] - q) + (M[3] - q) * (M[3] - q) + (M[5] - q) * (M[5] - q) + 2 * p1; | |
44 | + T p = sqrt(p2 / 6); | |
45 | + tensor3<T> Q; //allocate space for Q (q along the diagonals) | |
46 | + Q = (T)0; //initialize Q to zeros | |
47 | + Q(0, 0) = Q(1, 1) = Q(2, 2) = q; //set the diagonal values to q | |
48 | + tensor3<T> B = *this; // B1 = A | |
49 | + B.M[0] = (B.M[0] - q); | |
50 | + B.M[3] = (B.M[3] - q); | |
51 | + B.M[5] = (B.M[5] - q); | |
52 | + matrix_sym<T, 3>::operator_product(B, 1/p); // B = (1/p) * (A - q*I) | |
53 | + //B.M[0] = B.M[0] * 1/p; | |
54 | + //B.M[1] = B.M[1] * 1/p; | |
55 | + //B.M[2] = B.M[2] * 1/p; | |
56 | + //B.M[3] = B.M[3] * 1/p; | |
57 | + //B.M[4] = B.M[4] * 1/p; | |
58 | + //B.M[5] = B.M[5] * 1/p; | |
59 | + T r = B.det() / 2; //calculate det(B) / 2 | |
60 | + | |
61 | + // In exact arithmetic for a symmetric matrix - 1 <= r <= 1 | |
62 | + // but computation error can leave it slightly outside this range. | |
63 | + T phi; | |
64 | + if (r <= -1) phi = stim::PI / 3; | |
65 | + else if (r >= 1) phi = 0; | |
66 | + else phi = acos(r) / 3; | |
67 | + | |
68 | + // the eigenvalues satisfy eig3 >= eig2 >= eig1 | |
69 | + lam[2] = q + 2 * p * cos(phi); | |
70 | + lam[0] = q + 2 * p * cos(phi + (2 * stim::PI / 3)); | |
71 | + lam[1] = 3 * q - (lam[2] + lam[0]); | |
72 | + | |
73 | + return lam; | |
74 | + } | |
75 | + | |
76 | + CUDA_CALLABLE stim::matrix<T, 3> eig(stim::vec3<T>& lambda = stim::vec3<T>()) { | |
77 | + stim::matrix<T, 3> V; | |
78 | + | |
79 | + stim::matrix<T, 3> M1 = matrix_sym<T, 3>::mat(); | |
80 | + stim::matrix<T, 3> M2 = matrix_sym<T, 3>::mat(); | |
81 | + stim::matrix<T, 3> M3 = matrix_sym<T, 3>::mat(); // fill a tensor with symmetric values | |
82 | + | |
83 | + M1.operator_minus(M1, lambda[0]); // M1 = A - lambda[0] * I | |
84 | + | |
85 | + M2.operator_minus(M2, lambda[1]); // M2 = A - lambda[1] * I | |
86 | + | |
87 | + M3.operator_minus(M3, lambda[2]); // M3 = A - lambda[2] * I | |
88 | + | |
89 | + T Mod = 0; // module of one column | |
90 | + | |
91 | + T tmp1[9] = {0}; | |
92 | + for(int i = 0; i < 9; i++) { | |
93 | + for(int j = 0; j < 3; j++){ | |
94 | + tmp1[i] += M2(i%3, j) * M3(j, i/3); | |
95 | + } | |
96 | + } | |
97 | + if(tmp1[0] * tmp1[1] * tmp1[2] != 0) { // test whether it is zero column | |
98 | + Mod = sqrt(pow(tmp1[0],2) + pow(tmp1[1],2) + pow(tmp1[2],2)); | |
99 | + V(0, 0) = tmp1[0]/Mod; | |
100 | + V(1, 0) = tmp1[1]/Mod; | |
101 | + V(2, 0) = tmp1[2]/Mod; | |
102 | + } | |
103 | + else { | |
104 | + Mod = sqrt(pow(tmp1[3],2) + pow(tmp1[4],2) + pow(tmp1[5],2)); | |
105 | + V(0, 0) = tmp1[3]/Mod; | |
106 | + V(1, 0) = tmp1[4]/Mod; | |
107 | + V(2, 0) = tmp1[5]/Mod; | |
108 | + } | |
109 | + | |
110 | + T tmp2[9] = {0}; | |
111 | + for(int i = 0; i < 9; i++) { | |
112 | + for(int j = 0; j < 3; j++){ | |
113 | + tmp2[i] += M1(i%3, j) * M3(j, i/3); | |
114 | + } | |
115 | + } | |
116 | + if(tmp2[0] * tmp2[1] * tmp2[2] != 0) { | |
117 | + Mod = sqrt(pow(tmp2[0],2) + pow(tmp2[1],2) + pow(tmp2[2],2)); | |
118 | + V(0, 1) = tmp2[0]/Mod; | |
119 | + V(1, 1) = tmp2[1]/Mod; | |
120 | + V(2, 1) = tmp2[2]/Mod; | |
121 | + } | |
122 | + else { | |
123 | + Mod = sqrt(pow(tmp2[3],2) + pow(tmp2[4],2) + pow(tmp2[5],2)); | |
124 | + V(0, 1) = tmp2[3]/Mod; | |
125 | + V(1, 1) = tmp2[4]/Mod; | |
126 | + V(2, 1) = tmp2[5]/Mod; | |
127 | + } | |
128 | + | |
129 | + T tmp3[9] = {0}; | |
130 | + for(int i = 0; i < 9; i++) { | |
131 | + for(int j = 0; j < 3; j++){ | |
132 | + tmp3[i] += M1(i%3, j) * M2(j, i/3); | |
133 | + } | |
134 | + } | |
135 | + if(tmp3[0] * tmp3[1] * tmp3[2] != 0) { | |
136 | + Mod = sqrt(pow(tmp3[0],2) + pow(tmp3[1],2) + pow(tmp3[2],2)); | |
137 | + V(0, 2) = tmp3[0]/Mod; | |
138 | + V(1, 2) = tmp3[1]/Mod; | |
139 | + V(2, 2) = tmp3[2]/Mod; | |
140 | + } | |
141 | + else { | |
142 | + Mod = sqrt(pow(tmp3[3],2) + pow(tmp3[4],2) + pow(tmp3[5],2)); | |
143 | + V(0, 2) = tmp3[3]/Mod; | |
144 | + V(1, 2) = tmp3[4]/Mod; | |
145 | + V(2, 2) = tmp3[5]/Mod; | |
146 | + } | |
147 | + return V; //return the eigenvector matrix | |
148 | + } | |
149 | + // return one specific eigenvector | |
150 | + CUDA_CALLABLE stim::vec3<T> eig(int n, stim::vec3<T>& lambda = stim::vec3<T>()) { | |
151 | + stim::matrix<T, 3> V = eig(lambda); | |
152 | + stim::vec3<T> v; | |
153 | + for(int i = 0; i < 3; i++) | |
154 | + v[i] = V(i, n); | |
155 | + return v; | |
156 | + } | |
157 | + | |
158 | + | |
159 | + CUDA_CALLABLE T linear(stim::vec3<T>& lambda = stim::vec3<T>()) { | |
160 | + T cl = (lambda[2] - lambda[1]) / (lambda[0] + lambda[1] + lambda[2]); | |
161 | + return cl; | |
162 | + } | |
163 | + | |
164 | + CUDA_CALLABLE T Planar(stim::vec3<T>& lambda = stim::vec3<T>()) { | |
165 | + T cp = 2 * (lambda[1] - lambda[0]) / (lambda[0] + lambda[1] + lambda[2]); | |
166 | + return cp; | |
167 | + } | |
168 | + | |
169 | + CUDA_CALLABLE T spherical(stim::vec3<T>& lambda = stim::vec3<T>()) { | |
170 | + T cs = 3 * lambda[0] / (lambda[0] + lambda[1] + lambda[2]); | |
171 | + return cs; | |
172 | + } | |
173 | + | |
174 | + CUDA_CALLABLE T fa(stim::vec3<T>& lambda = stim::vec3<T>()) { | |
175 | + T fa = sqrt(1/2) * sqrt(pow(lambda[2] - lambda[1], 2) + pow(lambda[1] - lambda[0], 2) + pow(lambda[0] - lambda[2], 2)) / sqrt(pow(lambda[2], 2) + pow(lambda[1], 2) + pow(lambda[0], 2)); | |
176 | + } | |
177 | + //JACK 2: write functions to calculate anisotropy | |
178 | + //ex: fa(), linear(), planar(), spherical() | |
179 | + | |
180 | + | |
181 | + //calculate the eigenvectors and eigenvalues of the tensor | |
182 | + //CUDA_CALLABLE void eig(stim::matrix<T, 3>& v, stim::matrix<T, 3>& lambda){ | |
183 | + | |
184 | + //} | |
185 | + CUDA_CALLABLE tensor3<T> operator=(T rhs) { | |
186 | + stim::matrix_sym<T, 3>::operator=(rhs); | |
187 | + return *this; | |
188 | + } | |
189 | + | |
190 | + CUDA_CALLABLE tensor3<T> operator=(stim::matrix_sym<T, 3> rhs) { | |
191 | + stim::matrix_sym<T, 3>::operator=(rhs); | |
192 | + return *this; | |
193 | + } | |
194 | + }; | |
195 | + | |
196 | + | |
197 | +} //end namespace stim | |
198 | + | |
199 | + | |
200 | +#endif | |
0 | 201 | \ No newline at end of file | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | +#include <cmath> | |
7 | + | |
8 | + | |
9 | +namespace stim{ | |
10 | + | |
11 | + | |
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
13 | +template<typename T> | |
14 | +class vec3{ | |
15 | + | |
16 | +protected: | |
17 | + T ptr[3]; | |
18 | + | |
19 | +public: | |
20 | + | |
21 | + CUDA_CALLABLE vec3(){} | |
22 | + | |
23 | + CUDA_CALLABLE vec3(T v){ | |
24 | + ptr[0] = ptr[1] = ptr[2] = v; | |
25 | + } | |
26 | + | |
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
28 | + ptr[0] = x; | |
29 | + ptr[1] = y; | |
30 | + ptr[2] = z; | |
31 | + } | |
32 | + | |
33 | + //copy constructor | |
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
35 | + ptr[0] = other.ptr[0]; | |
36 | + ptr[1] = other.ptr[1]; | |
37 | + ptr[2] = other.ptr[2]; | |
38 | + } | |
39 | + | |
40 | + //access an element using an index | |
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
42 | + return ptr[idx]; | |
43 | + } | |
44 | + | |
45 | + CUDA_CALLABLE T* data(){ | |
46 | + return ptr; | |
47 | + } | |
48 | + | |
49 | +/// Casting operator. Creates a new vector with a new type U. | |
50 | + template< typename U > | |
51 | + CUDA_CALLABLE operator vec3<U>(){ | |
52 | + vec3<U> result; | |
53 | + result.ptr[0] = (U)ptr[0]; | |
54 | + result.ptr[1] = (U)ptr[1]; | |
55 | + result.ptr[2] = (U)ptr[2]; | |
56 | + | |
57 | + return result; | |
58 | + } | |
59 | + | |
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
61 | + CUDA_CALLABLE T len_sq() const{ | |
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
63 | + } | |
64 | + | |
65 | + /// computes the Euclidean length of the vector | |
66 | + CUDA_CALLABLE T len() const{ | |
67 | + return sqrt(len_sq()); | |
68 | + } | |
69 | + | |
70 | + | |
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
73 | + vec3<T> sph; | |
74 | + sph.ptr[0] = len(); | |
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
76 | + if(sph.ptr[0] == 0) | |
77 | + sph.ptr[2] = 0; | |
78 | + else | |
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
80 | + return sph; | |
81 | + } | |
82 | + | |
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
85 | + vec3<T> cart; | |
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
89 | + | |
90 | + return cart; | |
91 | + } | |
92 | + | |
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
94 | + CUDA_CALLABLE vec3<T> norm() const{ | |
95 | + vec3<T> result; | |
96 | + T l = len(); //compute the vector length | |
97 | + return (*this) / l; | |
98 | + } | |
99 | + | |
100 | + /// Computes the cross product of a 3-dimensional vector | |
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
102 | + | |
103 | + vec3<T> result; | |
104 | + | |
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
108 | + | |
109 | + return result; | |
110 | + } | |
111 | + | |
112 | + /// Compute the Euclidean inner (dot) product | |
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
115 | + } | |
116 | + | |
117 | + /// Arithmetic addition operator | |
118 | + | |
119 | + /// @param rhs is the right-hand-side operator for the addition | |
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
121 | + vec3<T> result; | |
122 | + result.ptr[0] = ptr[0] + rhs[0]; | |
123 | + result.ptr[1] = ptr[1] + rhs[1]; | |
124 | + result.ptr[2] = ptr[2] + rhs[2]; | |
125 | + return result; | |
126 | + } | |
127 | + | |
128 | + /// Arithmetic addition to a scalar | |
129 | + | |
130 | + /// @param rhs is the right-hand-side operator for the addition | |
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
132 | + vec3<T> result; | |
133 | + result.ptr[0] = ptr[0] + rhs; | |
134 | + result.ptr[1] = ptr[1] + rhs; | |
135 | + result.ptr[2] = ptr[2] + rhs; | |
136 | + return result; | |
137 | + } | |
138 | + | |
139 | + /// Arithmetic subtraction operator | |
140 | + | |
141 | + /// @param rhs is the right-hand-side operator for the subtraction | |
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
143 | + vec3<T> result; | |
144 | + result.ptr[0] = ptr[0] - rhs[0]; | |
145 | + result.ptr[1] = ptr[1] - rhs[1]; | |
146 | + result.ptr[2] = ptr[2] - rhs[2]; | |
147 | + return result; | |
148 | + } | |
149 | + /// Arithmetic subtraction to a scalar | |
150 | + | |
151 | + /// @param rhs is the right-hand-side operator for the addition | |
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
153 | + vec3<T> result; | |
154 | + result.ptr[0] = ptr[0] - rhs; | |
155 | + result.ptr[1] = ptr[1] - rhs; | |
156 | + result.ptr[2] = ptr[2] - rhs; | |
157 | + return result; | |
158 | + } | |
159 | + | |
160 | + /// Arithmetic scalar multiplication operator | |
161 | + | |
162 | + /// @param rhs is the right-hand-side operator for the subtraction | |
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
164 | + vec3<T> result; | |
165 | + result.ptr[0] = ptr[0] * rhs; | |
166 | + result.ptr[1] = ptr[1] * rhs; | |
167 | + result.ptr[2] = ptr[2] * rhs; | |
168 | + return result; | |
169 | + } | |
170 | + | |
171 | + /// Arithmetic scalar division operator | |
172 | + | |
173 | + /// @param rhs is the right-hand-side operator for the subtraction | |
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
175 | + return (*this) * ((T)1.0/rhs); | |
176 | + } | |
177 | + | |
178 | + /// Multiplication by a scalar, followed by assignment | |
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
180 | + ptr[0] = ptr[0] * rhs; | |
181 | + ptr[1] = ptr[1] * rhs; | |
182 | + ptr[2] = ptr[2] * rhs; | |
183 | + return *this; | |
184 | + } | |
185 | + | |
186 | + /// Addition and assignment | |
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
188 | + ptr[0] = ptr[0] + rhs; | |
189 | + ptr[1] = ptr[1] + rhs; | |
190 | + ptr[2] = ptr[2] + rhs; | |
191 | + return *this; | |
192 | + } | |
193 | + | |
194 | + /// Assign a scalar to all values | |
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
196 | + ptr[0] = ptr[0] = rhs; | |
197 | + ptr[1] = ptr[1] = rhs; | |
198 | + ptr[2] = ptr[2] = rhs; | |
199 | + return *this; | |
200 | + } | |
201 | + | |
202 | + /// Casting and assignment | |
203 | + template<typename Y> | |
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
205 | + ptr[0] = (T)rhs.ptr[0]; | |
206 | + ptr[1] = (T)rhs.ptr[1]; | |
207 | + ptr[2] = (T)rhs.ptr[2]; | |
208 | + return *this; | |
209 | + } | |
210 | + | |
211 | + /// Unary minus (returns the negative of the vector) | |
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
213 | + vec3<T> result; | |
214 | + result.ptr[0] = -ptr[0]; | |
215 | + result.ptr[1] = -ptr[1]; | |
216 | + result.ptr[2] = -ptr[2]; | |
217 | + return result; | |
218 | + } | |
219 | + | |
220 | +<<<<<<< HEAD | |
221 | +//#ifndef __NVCC__ | |
222 | +======= | |
223 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | |
224 | + /// Outputs the vector as a string | |
225 | + std::string str() const{ | |
226 | + std::stringstream ss; | |
227 | + | |
228 | + const size_t N = 3; | |
229 | + | |
230 | + ss<<"["; | |
231 | + for(size_t i=0; i<N; i++) | |
232 | + { | |
233 | + ss<<ptr[i]; | |
234 | + if(i != N-1) | |
235 | + ss<<", "; | |
236 | + } | |
237 | + ss<<"]"; | |
238 | + | |
239 | + return ss.str(); | |
240 | + } | |
241 | +<<<<<<< HEAD | |
242 | +//#endif | |
243 | +======= | |
244 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | |
245 | + | |
246 | + size_t size(){ return 3; } | |
247 | + | |
248 | + }; //end class vec3 | |
249 | +} //end namespace stim | |
250 | + | |
251 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
252 | +template <typename T> | |
253 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
254 | + return rhs * lhs; | |
255 | +} | |
256 | + | |
257 | +//stream operator | |
258 | +template<typename T> | |
259 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
260 | + os<<rhs.str(); | |
261 | + return os; | |
262 | +} | |
263 | + | |
264 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | +#include <cmath> | |
7 | + | |
8 | + | |
9 | +namespace stim{ | |
10 | + | |
11 | + | |
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
13 | +template<typename T> | |
14 | +class vec3{ | |
15 | + | |
16 | +protected: | |
17 | + T ptr[3]; | |
18 | + | |
19 | +public: | |
20 | + | |
21 | + CUDA_CALLABLE vec3(){} | |
22 | + | |
23 | + CUDA_CALLABLE vec3(T v){ | |
24 | + ptr[0] = ptr[1] = ptr[2] = v; | |
25 | + } | |
26 | + | |
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
28 | + ptr[0] = x; | |
29 | + ptr[1] = y; | |
30 | + ptr[2] = z; | |
31 | + } | |
32 | + | |
33 | + //copy constructor | |
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
35 | + ptr[0] = other.ptr[0]; | |
36 | + ptr[1] = other.ptr[1]; | |
37 | + ptr[2] = other.ptr[2]; | |
38 | + } | |
39 | + | |
40 | + //access an element using an index | |
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
42 | + return ptr[idx]; | |
43 | + } | |
44 | + | |
45 | + CUDA_CALLABLE T* data(){ | |
46 | + return ptr; | |
47 | + } | |
48 | + | |
49 | +/// Casting operator. Creates a new vector with a new type U. | |
50 | + template< typename U > | |
51 | + CUDA_CALLABLE operator vec3<U>(){ | |
52 | + vec3<U> result; | |
53 | + result.ptr[0] = (U)ptr[0]; | |
54 | + result.ptr[1] = (U)ptr[1]; | |
55 | + result.ptr[2] = (U)ptr[2]; | |
56 | + | |
57 | + return result; | |
58 | + } | |
59 | + | |
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
61 | + CUDA_CALLABLE T len_sq() const{ | |
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
63 | + } | |
64 | + | |
65 | + /// computes the Euclidean length of the vector | |
66 | + CUDA_CALLABLE T len() const{ | |
67 | + return sqrt(len_sq()); | |
68 | + } | |
69 | + | |
70 | + | |
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
73 | + vec3<T> sph; | |
74 | + sph.ptr[0] = len(); | |
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
76 | + if(sph.ptr[0] == 0) | |
77 | + sph.ptr[2] = 0; | |
78 | + else | |
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
80 | + return sph; | |
81 | + } | |
82 | + | |
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
85 | + vec3<T> cart; | |
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
89 | + | |
90 | + return cart; | |
91 | + } | |
92 | + | |
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
94 | + CUDA_CALLABLE vec3<T> norm() const{ | |
95 | + vec3<T> result; | |
96 | + T l = len(); //compute the vector length | |
97 | + return (*this) / l; | |
98 | + } | |
99 | + | |
100 | + /// Computes the cross product of a 3-dimensional vector | |
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
102 | + | |
103 | + vec3<T> result; | |
104 | + | |
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
108 | + | |
109 | + return result; | |
110 | + } | |
111 | + | |
112 | + /// Compute the Euclidean inner (dot) product | |
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
115 | + } | |
116 | + | |
117 | + /// Arithmetic addition operator | |
118 | + | |
119 | + /// @param rhs is the right-hand-side operator for the addition | |
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
121 | + vec3<T> result; | |
122 | + result.ptr[0] = ptr[0] + rhs[0]; | |
123 | + result.ptr[1] = ptr[1] + rhs[1]; | |
124 | + result.ptr[2] = ptr[2] + rhs[2]; | |
125 | + return result; | |
126 | + } | |
127 | + | |
128 | + /// Arithmetic addition to a scalar | |
129 | + | |
130 | + /// @param rhs is the right-hand-side operator for the addition | |
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
132 | + vec3<T> result; | |
133 | + result.ptr[0] = ptr[0] + rhs; | |
134 | + result.ptr[1] = ptr[1] + rhs; | |
135 | + result.ptr[2] = ptr[2] + rhs; | |
136 | + return result; | |
137 | + } | |
138 | + | |
139 | + /// Arithmetic subtraction operator | |
140 | + | |
141 | + /// @param rhs is the right-hand-side operator for the subtraction | |
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
143 | + vec3<T> result; | |
144 | + result.ptr[0] = ptr[0] - rhs[0]; | |
145 | + result.ptr[1] = ptr[1] - rhs[1]; | |
146 | + result.ptr[2] = ptr[2] - rhs[2]; | |
147 | + return result; | |
148 | + } | |
149 | + /// Arithmetic subtraction to a scalar | |
150 | + | |
151 | + /// @param rhs is the right-hand-side operator for the addition | |
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
153 | + vec3<T> result; | |
154 | + result.ptr[0] = ptr[0] - rhs; | |
155 | + result.ptr[1] = ptr[1] - rhs; | |
156 | + result.ptr[2] = ptr[2] - rhs; | |
157 | + return result; | |
158 | + } | |
159 | + | |
160 | + /// Arithmetic scalar multiplication operator | |
161 | + | |
162 | + /// @param rhs is the right-hand-side operator for the subtraction | |
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
164 | + vec3<T> result; | |
165 | + result.ptr[0] = ptr[0] * rhs; | |
166 | + result.ptr[1] = ptr[1] * rhs; | |
167 | + result.ptr[2] = ptr[2] * rhs; | |
168 | + return result; | |
169 | + } | |
170 | + | |
171 | + /// Arithmetic scalar division operator | |
172 | + | |
173 | + /// @param rhs is the right-hand-side operator for the subtraction | |
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
175 | + return (*this) * ((T)1.0/rhs); | |
176 | + } | |
177 | + | |
178 | + /// Multiplication by a scalar, followed by assignment | |
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
180 | + ptr[0] = ptr[0] * rhs; | |
181 | + ptr[1] = ptr[1] * rhs; | |
182 | + ptr[2] = ptr[2] * rhs; | |
183 | + return *this; | |
184 | + } | |
185 | + | |
186 | + /// Addition and assignment | |
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
188 | + ptr[0] = ptr[0] + rhs; | |
189 | + ptr[1] = ptr[1] + rhs; | |
190 | + ptr[2] = ptr[2] + rhs; | |
191 | + return *this; | |
192 | + } | |
193 | + | |
194 | + /// Assign a scalar to all values | |
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
196 | + ptr[0] = ptr[0] = rhs; | |
197 | + ptr[1] = ptr[1] = rhs; | |
198 | + ptr[2] = ptr[2] = rhs; | |
199 | + return *this; | |
200 | + } | |
201 | + | |
202 | + /// Casting and assignment | |
203 | + template<typename Y> | |
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
205 | + ptr[0] = (T)rhs.ptr[0]; | |
206 | + ptr[1] = (T)rhs.ptr[1]; | |
207 | + ptr[2] = (T)rhs.ptr[2]; | |
208 | + return *this; | |
209 | + } | |
210 | + | |
211 | + /// Unary minus (returns the negative of the vector) | |
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
213 | + vec3<T> result; | |
214 | + result.ptr[0] = -ptr[0]; | |
215 | + result.ptr[1] = -ptr[1]; | |
216 | + result.ptr[2] = -ptr[2]; | |
217 | + return result; | |
218 | + } | |
219 | + | |
220 | +<<<<<<< HEAD | |
221 | +//#ifndef __NVCC__ | |
222 | +======= | |
223 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | |
224 | + /// Outputs the vector as a string | |
225 | + std::string str() const{ | |
226 | + std::stringstream ss; | |
227 | + | |
228 | + const size_t N = 3; | |
229 | + | |
230 | + ss<<"["; | |
231 | + for(size_t i=0; i<N; i++) | |
232 | + { | |
233 | + ss<<ptr[i]; | |
234 | + if(i != N-1) | |
235 | + ss<<", "; | |
236 | + } | |
237 | + ss<<"]"; | |
238 | + | |
239 | + return ss.str(); | |
240 | + } | |
241 | +<<<<<<< HEAD | |
242 | +//#endif | |
243 | +======= | |
244 | +>>>>>>> 9f5c0d4a055a2a19e69a97db1441aa617f96180c | |
245 | + | |
246 | + size_t size(){ return 3; } | |
247 | + | |
248 | + }; //end class vec3 | |
249 | +} //end namespace stim | |
250 | + | |
251 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
252 | +template <typename T> | |
253 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
254 | + return rhs * lhs; | |
255 | +} | |
256 | + | |
257 | +//stream operator | |
258 | +template<typename T> | |
259 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
260 | + os<<rhs.str(); | |
261 | + return os; | |
262 | +} | |
263 | + | |
264 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | + | |
7 | + | |
8 | +namespace stim{ | |
9 | + | |
10 | + | |
11 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
12 | +template<typename T> | |
13 | +class vec3{ | |
14 | + | |
15 | +protected: | |
16 | + T ptr[3]; | |
17 | + | |
18 | +public: | |
19 | + | |
20 | + CUDA_CALLABLE vec3(){} | |
21 | + | |
22 | + CUDA_CALLABLE vec3(T v){ | |
23 | + ptr[0] = ptr[1] = ptr[2] = v; | |
24 | + } | |
25 | + | |
26 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
27 | + ptr[0] = x; | |
28 | + ptr[1] = y; | |
29 | + ptr[2] = z; | |
30 | + } | |
31 | + | |
32 | + //copy constructor | |
33 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
34 | + ptr[0] = other.ptr[0]; | |
35 | + ptr[1] = other.ptr[1]; | |
36 | + ptr[2] = other.ptr[2]; | |
37 | + } | |
38 | + | |
39 | + //access an element using an index | |
40 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
41 | + return ptr[idx]; | |
42 | + } | |
43 | + | |
44 | + CUDA_CALLABLE T* data(){ | |
45 | + return ptr; | |
46 | + } | |
47 | + | |
48 | +/// Casting operator. Creates a new vector with a new type U. | |
49 | + template< typename U > | |
50 | + CUDA_CALLABLE operator vec3<U>(){ | |
51 | + vec3<U> result; | |
52 | + result.ptr[0] = (U)ptr[0]; | |
53 | + result.ptr[1] = (U)ptr[1]; | |
54 | + result.ptr[2] = (U)ptr[2]; | |
55 | + | |
56 | + return result; | |
57 | + } | |
58 | + | |
59 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
60 | + CUDA_CALLABLE T len_sq() const{ | |
61 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
62 | + } | |
63 | + | |
64 | + /// computes the Euclidean length of the vector | |
65 | + CUDA_CALLABLE T len() const{ | |
66 | + return sqrt(len_sq()); | |
67 | + } | |
68 | + | |
69 | + | |
70 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
71 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
72 | + vec3<T> sph; | |
73 | + sph.ptr[0] = len(); | |
74 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
75 | + if(sph.ptr[0] == 0) | |
76 | + sph.ptr[2] = 0; | |
77 | + else | |
78 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
79 | + return sph; | |
80 | + } | |
81 | + | |
82 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
83 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
84 | + vec3<T> cart; | |
85 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
86 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
88 | + | |
89 | + return cart; | |
90 | + } | |
91 | + | |
92 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
93 | + CUDA_CALLABLE vec3<T> norm() const{ | |
94 | + vec3<T> result; | |
95 | + T l = len(); //compute the vector length | |
96 | + return (*this) / l; | |
97 | + } | |
98 | + | |
99 | + /// Computes the cross product of a 3-dimensional vector | |
100 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
101 | + | |
102 | + vec3<T> result; | |
103 | + | |
104 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
105 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
106 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
107 | + | |
108 | + return result; | |
109 | + } | |
110 | + | |
111 | + /// Compute the Euclidean inner (dot) product | |
112 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
113 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
114 | + } | |
115 | + | |
116 | + /// Arithmetic addition operator | |
117 | + | |
118 | + /// @param rhs is the right-hand-side operator for the addition | |
119 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
120 | + vec3<T> result; | |
121 | + result.ptr[0] = ptr[0] + rhs[0]; | |
122 | + result.ptr[1] = ptr[1] + rhs[1]; | |
123 | + result.ptr[2] = ptr[2] + rhs[2]; | |
124 | + return result; | |
125 | + } | |
126 | + | |
127 | + /// Arithmetic addition to a scalar | |
128 | + | |
129 | + /// @param rhs is the right-hand-side operator for the addition | |
130 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
131 | + vec3<T> result; | |
132 | + result.ptr[0] = ptr[0] + rhs; | |
133 | + result.ptr[1] = ptr[1] + rhs; | |
134 | + result.ptr[2] = ptr[2] + rhs; | |
135 | + return result; | |
136 | + } | |
137 | + | |
138 | + /// Arithmetic subtraction operator | |
139 | + | |
140 | + /// @param rhs is the right-hand-side operator for the subtraction | |
141 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
142 | + vec3<T> result; | |
143 | + result.ptr[0] = ptr[0] - rhs[0]; | |
144 | + result.ptr[1] = ptr[1] - rhs[1]; | |
145 | + result.ptr[2] = ptr[2] - rhs[2]; | |
146 | + return result; | |
147 | + } | |
148 | + /// Arithmetic subtraction to a scalar | |
149 | + | |
150 | + /// @param rhs is the right-hand-side operator for the addition | |
151 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
152 | + vec3<T> result; | |
153 | + result.ptr[0] = ptr[0] - rhs; | |
154 | + result.ptr[1] = ptr[1] - rhs; | |
155 | + result.ptr[2] = ptr[2] - rhs; | |
156 | + return result; | |
157 | + } | |
158 | + | |
159 | + /// Arithmetic scalar multiplication operator | |
160 | + | |
161 | + /// @param rhs is the right-hand-side operator for the subtraction | |
162 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
163 | + vec3<T> result; | |
164 | + result.ptr[0] = ptr[0] * rhs; | |
165 | + result.ptr[1] = ptr[1] * rhs; | |
166 | + result.ptr[2] = ptr[2] * rhs; | |
167 | + return result; | |
168 | + } | |
169 | + | |
170 | + /// Arithmetic scalar division operator | |
171 | + | |
172 | + /// @param rhs is the right-hand-side operator for the subtraction | |
173 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
174 | + return (*this) * ((T)1.0/rhs); | |
175 | + } | |
176 | + | |
177 | + /// Multiplication by a scalar, followed by assignment | |
178 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
179 | + ptr[0] = ptr[0] * rhs; | |
180 | + ptr[1] = ptr[1] * rhs; | |
181 | + ptr[2] = ptr[2] * rhs; | |
182 | + return *this; | |
183 | + } | |
184 | + | |
185 | + /// Addition and assignment | |
186 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
187 | + ptr[0] = ptr[0] + rhs; | |
188 | + ptr[1] = ptr[1] + rhs; | |
189 | + ptr[2] = ptr[2] + rhs; | |
190 | + return *this; | |
191 | + } | |
192 | + | |
193 | + /// Assign a scalar to all values | |
194 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
195 | + ptr[0] = ptr[0] = rhs; | |
196 | + ptr[1] = ptr[1] = rhs; | |
197 | + ptr[2] = ptr[2] = rhs; | |
198 | + return *this; | |
199 | + } | |
200 | + | |
201 | + /// Casting and assignment | |
202 | + template<typename Y> | |
203 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
204 | + ptr[0] = (T)rhs.ptr[0]; | |
205 | + ptr[1] = (T)rhs.ptr[1]; | |
206 | + ptr[2] = (T)rhs.ptr[2]; | |
207 | + return *this; | |
208 | + } | |
209 | + | |
210 | + /// Unary minus (returns the negative of the vector) | |
211 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
212 | + vec3<T> result; | |
213 | + result.ptr[0] = -ptr[0]; | |
214 | + result.ptr[1] = -ptr[1]; | |
215 | + result.ptr[2] = -ptr[2]; | |
216 | + return result; | |
217 | + } | |
218 | + | |
219 | +#ifndef __NVCC__ | |
220 | + /// Outputs the vector as a string | |
221 | + std::string str() const{ | |
222 | + std::stringstream ss; | |
223 | + | |
224 | + const size_t N = 3; | |
225 | + | |
226 | + ss<<"["; | |
227 | + for(size_t i=0; i<N; i++) | |
228 | + { | |
229 | + ss<<ptr[i]; | |
230 | + if(i != N-1) | |
231 | + ss<<", "; | |
232 | + } | |
233 | + ss<<"]"; | |
234 | + | |
235 | + return ss.str(); | |
236 | + } | |
237 | +#endif | |
238 | + | |
239 | + size_t size(){ return 3; } | |
240 | + | |
241 | + }; //end class vec3 | |
242 | +} //end namespace stim | |
243 | + | |
244 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
245 | +template <typename T> | |
246 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
247 | + return rhs * lhs; | |
248 | +} | |
249 | + | |
250 | +//stream operator | |
251 | +template<typename T> | |
252 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
253 | + os<<rhs.str(); | |
254 | + return os; | |
255 | +} | |
256 | + | |
257 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | + | |
7 | + | |
8 | +namespace stim{ | |
9 | + | |
10 | + | |
11 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
12 | +template<typename T> | |
13 | +class vec3{ | |
14 | + | |
15 | +protected: | |
16 | + T ptr[3]; | |
17 | + | |
18 | +public: | |
19 | + | |
20 | + CUDA_CALLABLE vec3(){} | |
21 | + | |
22 | + CUDA_CALLABLE vec3(T v){ | |
23 | + ptr[0] = ptr[1] = ptr[2] = v; | |
24 | + } | |
25 | + | |
26 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
27 | + ptr[0] = x; | |
28 | + ptr[1] = y; | |
29 | + ptr[2] = z; | |
30 | + } | |
31 | + | |
32 | + //copy constructor | |
33 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
34 | + ptr[0] = other.ptr[0]; | |
35 | + ptr[1] = other.ptr[1]; | |
36 | + ptr[2] = other.ptr[2]; | |
37 | + } | |
38 | + | |
39 | + //access an element using an index | |
40 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
41 | + return ptr[idx]; | |
42 | + } | |
43 | + | |
44 | + CUDA_CALLABLE T* data(){ | |
45 | + return ptr; | |
46 | + } | |
47 | + | |
48 | +/// Casting operator. Creates a new vector with a new type U. | |
49 | + template< typename U > | |
50 | + CUDA_CALLABLE operator vec3<U>(){ | |
51 | + vec3<U> result; | |
52 | + result.ptr[0] = (U)ptr[0]; | |
53 | + result.ptr[1] = (U)ptr[1]; | |
54 | + result.ptr[2] = (U)ptr[2]; | |
55 | + | |
56 | + return result; | |
57 | + } | |
58 | + | |
59 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
60 | + CUDA_CALLABLE T len_sq() const{ | |
61 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
62 | + } | |
63 | + | |
64 | + /// computes the Euclidean length of the vector | |
65 | + CUDA_CALLABLE T len() const{ | |
66 | + return sqrt(len_sq()); | |
67 | + } | |
68 | + | |
69 | + | |
70 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
71 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
72 | + vec3<T> sph; | |
73 | + sph.ptr[0] = len(); | |
74 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
75 | + if(sph.ptr[0] == 0) | |
76 | + sph.ptr[2] = 0; | |
77 | + else | |
78 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
79 | + return sph; | |
80 | + } | |
81 | + | |
82 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
83 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
84 | + vec3<T> cart; | |
85 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
86 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
88 | + | |
89 | + return cart; | |
90 | + } | |
91 | + | |
92 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
93 | + CUDA_CALLABLE vec3<T> norm() const{ | |
94 | + vec3<T> result; | |
95 | + T l = len(); //compute the vector length | |
96 | + return (*this) / l; | |
97 | + } | |
98 | + | |
99 | + /// Computes the cross product of a 3-dimensional vector | |
100 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
101 | + | |
102 | + vec3<T> result; | |
103 | + | |
104 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
105 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
106 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
107 | + | |
108 | + return result; | |
109 | + } | |
110 | + | |
111 | + /// Compute the Euclidean inner (dot) product | |
112 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
113 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
114 | + } | |
115 | + | |
116 | + /// Arithmetic addition operator | |
117 | + | |
118 | + /// @param rhs is the right-hand-side operator for the addition | |
119 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
120 | + vec3<T> result; | |
121 | + result.ptr[0] = ptr[0] + rhs[0]; | |
122 | + result.ptr[1] = ptr[1] + rhs[1]; | |
123 | + result.ptr[2] = ptr[2] + rhs[2]; | |
124 | + return result; | |
125 | + } | |
126 | + | |
127 | + /// Arithmetic addition to a scalar | |
128 | + | |
129 | + /// @param rhs is the right-hand-side operator for the addition | |
130 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
131 | + vec3<T> result; | |
132 | + result.ptr[0] = ptr[0] + rhs; | |
133 | + result.ptr[1] = ptr[1] + rhs; | |
134 | + result.ptr[2] = ptr[2] + rhs; | |
135 | + return result; | |
136 | + } | |
137 | + | |
138 | + /// Arithmetic subtraction operator | |
139 | + | |
140 | + /// @param rhs is the right-hand-side operator for the subtraction | |
141 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
142 | + vec3<T> result; | |
143 | + result.ptr[0] = ptr[0] - rhs[0]; | |
144 | + result.ptr[1] = ptr[1] - rhs[1]; | |
145 | + result.ptr[2] = ptr[2] - rhs[2]; | |
146 | + return result; | |
147 | + } | |
148 | + /// Arithmetic subtraction to a scalar | |
149 | + | |
150 | + /// @param rhs is the right-hand-side operator for the addition | |
151 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
152 | + vec3<T> result; | |
153 | + result.ptr[0] = ptr[0] - rhs; | |
154 | + result.ptr[1] = ptr[1] - rhs; | |
155 | + result.ptr[2] = ptr[2] - rhs; | |
156 | + return result; | |
157 | + } | |
158 | + | |
159 | + /// Arithmetic scalar multiplication operator | |
160 | + | |
161 | + /// @param rhs is the right-hand-side operator for the subtraction | |
162 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
163 | + vec3<T> result; | |
164 | + result.ptr[0] = ptr[0] * rhs; | |
165 | + result.ptr[1] = ptr[1] * rhs; | |
166 | + result.ptr[2] = ptr[2] * rhs; | |
167 | + return result; | |
168 | + } | |
169 | + | |
170 | + /// Arithmetic scalar division operator | |
171 | + | |
172 | + /// @param rhs is the right-hand-side operator for the subtraction | |
173 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
174 | + return (*this) * ((T)1.0/rhs); | |
175 | + } | |
176 | + | |
177 | + /// Multiplication by a scalar, followed by assignment | |
178 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
179 | + ptr[0] = ptr[0] * rhs; | |
180 | + ptr[1] = ptr[1] * rhs; | |
181 | + ptr[2] = ptr[2] * rhs; | |
182 | + return *this; | |
183 | + } | |
184 | + | |
185 | + /// Addition and assignment | |
186 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
187 | + ptr[0] = ptr[0] + rhs; | |
188 | + ptr[1] = ptr[1] + rhs; | |
189 | + ptr[2] = ptr[2] + rhs; | |
190 | + return *this; | |
191 | + } | |
192 | + | |
193 | + /// Assign a scalar to all values | |
194 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
195 | + ptr[0] = ptr[0] = rhs; | |
196 | + ptr[1] = ptr[1] = rhs; | |
197 | + ptr[2] = ptr[2] = rhs; | |
198 | + return *this; | |
199 | + } | |
200 | + | |
201 | + /// Casting and assignment | |
202 | + template<typename Y> | |
203 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
204 | + ptr[0] = (T)rhs.ptr[0]; | |
205 | + ptr[1] = (T)rhs.ptr[1]; | |
206 | + ptr[2] = (T)rhs.ptr[2]; | |
207 | + return *this; | |
208 | + } | |
209 | + | |
210 | + /// Unary minus (returns the negative of the vector) | |
211 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
212 | + vec3<T> result; | |
213 | + result.ptr[0] = -ptr[0]; | |
214 | + result.ptr[1] = -ptr[1]; | |
215 | + result.ptr[2] = -ptr[2]; | |
216 | + return result; | |
217 | + } | |
218 | + | |
219 | +//#ifndef __NVCC__ | |
220 | + /// Outputs the vector as a string | |
221 | + std::string str() const{ | |
222 | + std::stringstream ss; | |
223 | + | |
224 | + const size_t N = 3; | |
225 | + | |
226 | + ss<<"["; | |
227 | + for(size_t i=0; i<N; i++) | |
228 | + { | |
229 | + ss<<ptr[i]; | |
230 | + if(i != N-1) | |
231 | + ss<<", "; | |
232 | + } | |
233 | + ss<<"]"; | |
234 | + | |
235 | + return ss.str(); | |
236 | + } | |
237 | +//#endif | |
238 | + | |
239 | + size_t size(){ return 3; } | |
240 | + | |
241 | + }; //end class vec3 | |
242 | +} //end namespace stim | |
243 | + | |
244 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
245 | +template <typename T> | |
246 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
247 | + return rhs * lhs; | |
248 | +} | |
249 | + | |
250 | +//stream operator | |
251 | +template<typename T> | |
252 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
253 | + os<<rhs.str(); | |
254 | + return os; | |
255 | +} | |
256 | + | |
257 | +#endif | ... | ... |
1 | +#ifndef STIM_VEC3_H | |
2 | +#define STIM_VEC3_H | |
3 | + | |
4 | + | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | +#include <cmath> | |
7 | + | |
8 | + | |
9 | +namespace stim{ | |
10 | + | |
11 | + | |
12 | +/// A class designed to act as a 3D vector with CUDA compatibility | |
13 | +template<typename T> | |
14 | +class vec3{ | |
15 | + | |
16 | +protected: | |
17 | + T ptr[3]; | |
18 | + | |
19 | +public: | |
20 | + | |
21 | + CUDA_CALLABLE vec3(){} | |
22 | + | |
23 | + CUDA_CALLABLE vec3(T v){ | |
24 | + ptr[0] = ptr[1] = ptr[2] = v; | |
25 | + } | |
26 | + | |
27 | + CUDA_CALLABLE vec3(T x, T y, T z){ | |
28 | + ptr[0] = x; | |
29 | + ptr[1] = y; | |
30 | + ptr[2] = z; | |
31 | + } | |
32 | + | |
33 | + //copy constructor | |
34 | + CUDA_CALLABLE vec3( const vec3<T>& other){ | |
35 | + ptr[0] = other.ptr[0]; | |
36 | + ptr[1] = other.ptr[1]; | |
37 | + ptr[2] = other.ptr[2]; | |
38 | + } | |
39 | + | |
40 | + //access an element using an index | |
41 | + CUDA_CALLABLE T& operator[](size_t idx){ | |
42 | + return ptr[idx]; | |
43 | + } | |
44 | + | |
45 | + CUDA_CALLABLE T* data(){ | |
46 | + return ptr; | |
47 | + } | |
48 | + | |
49 | +/// Casting operator. Creates a new vector with a new type U. | |
50 | + template< typename U > | |
51 | + CUDA_CALLABLE operator vec3<U>(){ | |
52 | + vec3<U> result; | |
53 | + result.ptr[0] = (U)ptr[0]; | |
54 | + result.ptr[1] = (U)ptr[1]; | |
55 | + result.ptr[2] = (U)ptr[2]; | |
56 | + | |
57 | + return result; | |
58 | + } | |
59 | + | |
60 | + // computes the squared Euclidean length (useful for several operations where only >, =, or < matter) | |
61 | + CUDA_CALLABLE T len_sq() const{ | |
62 | + return ptr[0] * ptr[0] + ptr[1] * ptr[1] + ptr[2] * ptr[2]; | |
63 | + } | |
64 | + | |
65 | + /// computes the Euclidean length of the vector | |
66 | + CUDA_CALLABLE T len() const{ | |
67 | + return sqrt(len_sq()); | |
68 | + } | |
69 | + | |
70 | + | |
71 | + /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) | |
72 | + CUDA_CALLABLE vec3<T> cart2sph() const{ | |
73 | + vec3<T> sph; | |
74 | + sph.ptr[0] = len(); | |
75 | + sph.ptr[1] = std::atan2(ptr[1], ptr[0]); | |
76 | + if(sph.ptr[0] == 0) | |
77 | + sph.ptr[2] = 0; | |
78 | + else | |
79 | + sph.ptr[2] = std::acos(ptr[2] / sph.ptr[0]); | |
80 | + return sph; | |
81 | + } | |
82 | + | |
83 | + /// Convert the vector from cartesian to spherical coordinates (r, theta, phi -> x, y, z where theta = [0, 2*pi]) | |
84 | + CUDA_CALLABLE vec3<T> sph2cart() const{ | |
85 | + vec3<T> cart; | |
86 | + cart.ptr[0] = ptr[0] * std::cos(ptr[1]) * std::sin(ptr[2]); | |
87 | + cart.ptr[1] = ptr[0] * std::sin(ptr[1]) * std::sin(ptr[2]); | |
88 | + cart.ptr[2] = ptr[0] * std::cos(ptr[2]); | |
89 | + | |
90 | + return cart; | |
91 | + } | |
92 | + | |
93 | + /// Computes the normalized vector (where each coordinate is divided by the L2 norm) | |
94 | + CUDA_CALLABLE vec3<T> norm() const{ | |
95 | + vec3<T> result; | |
96 | + T l = len(); //compute the vector length | |
97 | + return (*this) / l; | |
98 | + } | |
99 | + | |
100 | + /// Computes the cross product of a 3-dimensional vector | |
101 | + CUDA_CALLABLE vec3<T> cross(const vec3<T> rhs) const{ | |
102 | + | |
103 | + vec3<T> result; | |
104 | + | |
105 | + result[0] = (ptr[1] * rhs.ptr[2] - ptr[2] * rhs.ptr[1]); | |
106 | + result[1] = (ptr[2] * rhs.ptr[0] - ptr[0] * rhs.ptr[2]); | |
107 | + result[2] = (ptr[0] * rhs.ptr[1] - ptr[1] * rhs.ptr[0]); | |
108 | + | |
109 | + return result; | |
110 | + } | |
111 | + | |
112 | + /// Compute the Euclidean inner (dot) product | |
113 | + CUDA_CALLABLE T dot(vec3<T> rhs) const{ | |
114 | + return ptr[0] * rhs.ptr[0] + ptr[1] * rhs.ptr[1] + ptr[2] * rhs.ptr[2]; | |
115 | + } | |
116 | + | |
117 | + /// Arithmetic addition operator | |
118 | + | |
119 | + /// @param rhs is the right-hand-side operator for the addition | |
120 | + CUDA_CALLABLE vec3<T> operator+(vec3<T> rhs) const{ | |
121 | + vec3<T> result; | |
122 | + result.ptr[0] = ptr[0] + rhs[0]; | |
123 | + result.ptr[1] = ptr[1] + rhs[1]; | |
124 | + result.ptr[2] = ptr[2] + rhs[2]; | |
125 | + return result; | |
126 | + } | |
127 | + | |
128 | + /// Arithmetic addition to a scalar | |
129 | + | |
130 | + /// @param rhs is the right-hand-side operator for the addition | |
131 | + CUDA_CALLABLE vec3<T> operator+(T rhs) const{ | |
132 | + vec3<T> result; | |
133 | + result.ptr[0] = ptr[0] + rhs; | |
134 | + result.ptr[1] = ptr[1] + rhs; | |
135 | + result.ptr[2] = ptr[2] + rhs; | |
136 | + return result; | |
137 | + } | |
138 | + | |
139 | + /// Arithmetic subtraction operator | |
140 | + | |
141 | + /// @param rhs is the right-hand-side operator for the subtraction | |
142 | + CUDA_CALLABLE vec3<T> operator-(vec3<T> rhs) const{ | |
143 | + vec3<T> result; | |
144 | + result.ptr[0] = ptr[0] - rhs[0]; | |
145 | + result.ptr[1] = ptr[1] - rhs[1]; | |
146 | + result.ptr[2] = ptr[2] - rhs[2]; | |
147 | + return result; | |
148 | + } | |
149 | + /// Arithmetic subtraction to a scalar | |
150 | + | |
151 | + /// @param rhs is the right-hand-side operator for the addition | |
152 | + CUDA_CALLABLE vec3<T> operator-(T rhs) const{ | |
153 | + vec3<T> result; | |
154 | + result.ptr[0] = ptr[0] - rhs; | |
155 | + result.ptr[1] = ptr[1] - rhs; | |
156 | + result.ptr[2] = ptr[2] - rhs; | |
157 | + return result; | |
158 | + } | |
159 | + | |
160 | + /// Arithmetic scalar multiplication operator | |
161 | + | |
162 | + /// @param rhs is the right-hand-side operator for the subtraction | |
163 | + CUDA_CALLABLE vec3<T> operator*(T rhs) const{ | |
164 | + vec3<T> result; | |
165 | + result.ptr[0] = ptr[0] * rhs; | |
166 | + result.ptr[1] = ptr[1] * rhs; | |
167 | + result.ptr[2] = ptr[2] * rhs; | |
168 | + return result; | |
169 | + } | |
170 | + | |
171 | + /// Arithmetic scalar division operator | |
172 | + | |
173 | + /// @param rhs is the right-hand-side operator for the subtraction | |
174 | + CUDA_CALLABLE vec3<T> operator/(T rhs) const{ | |
175 | + return (*this) * ((T)1.0/rhs); | |
176 | + } | |
177 | + | |
178 | + /// Multiplication by a scalar, followed by assignment | |
179 | + CUDA_CALLABLE vec3<T> operator*=(T rhs){ | |
180 | + ptr[0] = ptr[0] * rhs; | |
181 | + ptr[1] = ptr[1] * rhs; | |
182 | + ptr[2] = ptr[2] * rhs; | |
183 | + return *this; | |
184 | + } | |
185 | + | |
186 | + /// Addition and assignment | |
187 | + CUDA_CALLABLE vec3<T> operator+=(vec3<T> rhs){ | |
188 | + ptr[0] = ptr[0] + rhs; | |
189 | + ptr[1] = ptr[1] + rhs; | |
190 | + ptr[2] = ptr[2] + rhs; | |
191 | + return *this; | |
192 | + } | |
193 | + | |
194 | + /// Assign a scalar to all values | |
195 | + CUDA_CALLABLE vec3<T> & operator=(T rhs){ | |
196 | + ptr[0] = ptr[0] = rhs; | |
197 | + ptr[1] = ptr[1] = rhs; | |
198 | + ptr[2] = ptr[2] = rhs; | |
199 | + return *this; | |
200 | + } | |
201 | + | |
202 | + /// Casting and assignment | |
203 | + template<typename Y> | |
204 | + CUDA_CALLABLE vec3<T> & operator=(vec3<Y> rhs){ | |
205 | + ptr[0] = (T)rhs.ptr[0]; | |
206 | + ptr[1] = (T)rhs.ptr[1]; | |
207 | + ptr[2] = (T)rhs.ptr[2]; | |
208 | + return *this; | |
209 | + } | |
210 | + | |
211 | + /// Unary minus (returns the negative of the vector) | |
212 | + CUDA_CALLABLE vec3<T> operator-() const{ | |
213 | + vec3<T> result; | |
214 | + result.ptr[0] = -ptr[0]; | |
215 | + result.ptr[1] = -ptr[1]; | |
216 | + result.ptr[2] = -ptr[2]; | |
217 | + return result; | |
218 | + } | |
219 | + | |
220 | + /// Outputs the vector as a string | |
221 | + std::string str() const{ | |
222 | + std::stringstream ss; | |
223 | + | |
224 | + const size_t N = 3; | |
225 | + | |
226 | + ss<<"["; | |
227 | + for(size_t i=0; i<N; i++) | |
228 | + { | |
229 | + ss<<ptr[i]; | |
230 | + if(i != N-1) | |
231 | + ss<<", "; | |
232 | + } | |
233 | + ss<<"]"; | |
234 | + | |
235 | + return ss.str(); | |
236 | + } | |
237 | + | |
238 | + size_t size(){ return 3; } | |
239 | + | |
240 | + }; //end class vec3 | |
241 | +} //end namespace stim | |
242 | + | |
243 | +/// Multiply a vector by a constant when the vector is on the right hand side | |
244 | +template <typename T> | |
245 | +stim::vec3<T> operator*(T lhs, stim::vec3<T> rhs){ | |
246 | + return rhs * lhs; | |
247 | +} | |
248 | + | |
249 | +//stream operator | |
250 | +template<typename T> | |
251 | +std::ostream& operator<<(std::ostream& os, stim::vec3<T> const& rhs){ | |
252 | + os<<rhs.str(); | |
253 | + return os; | |
254 | +} | |
255 | + | |
256 | +#endif | ... | ... |
stim/math/vector.h
... | ... | @@ -5,6 +5,7 @@ |
5 | 5 | #include <cmath> |
6 | 6 | #include <sstream> |
7 | 7 | #include <vector> |
8 | +#include <algorithm> | |
8 | 9 | |
9 | 10 | #include <stim/cuda/cudatools/callable.h> |
10 | 11 | #include <stim/math/vec3.h> |
... | ... | @@ -74,11 +75,11 @@ struct vec : public std::vector<T> |
74 | 75 | at(i) = other[i]; |
75 | 76 | } |
76 | 77 | } |
77 | - | |
78 | + | |
78 | 79 | // vec( vec3<T>& other){ |
79 | 80 | // resize(3); //resize the current vector to match the copy |
80 | 81 | // for(size_t i=0; i<3; i++){ //copy each element |
81 | -// at(i) = other[i]; | |
82 | +// at(i) = other[i]; | |
82 | 83 | // } |
83 | 84 | // } |
84 | 85 | |
... | ... | @@ -139,16 +140,16 @@ struct vec : public std::vector<T> |
139 | 140 | |
140 | 141 | } |
141 | 142 | |
142 | - | |
143 | - vec<T> cyl2cart() const | |
144 | - { | |
145 | - vec<T> cyl; | |
146 | - cyl.push_back(at(0)*std::sin(at(1))); | |
147 | - cyl.push_back(at(0)*std::cos(at(1))); | |
148 | - cyl.push_back(at(2)); | |
149 | - return(cyl); | |
150 | - | |
151 | - } | |
143 | + | |
144 | + vec<T> cyl2cart() const | |
145 | + { | |
146 | + vec<T> cyl; | |
147 | + cyl.push_back(at(0)*std::sin(at(1))); | |
148 | + cyl.push_back(at(0)*std::cos(at(1))); | |
149 | + cyl.push_back(at(2)); | |
150 | + return(cyl); | |
151 | + | |
152 | + } | |
152 | 153 | /// Convert the vector from cartesian to spherical coordinates (x, y, z -> r, theta, phi where theta = [0, 2*pi]) |
153 | 154 | vec<T> cart2sph() const |
154 | 155 | { |
... | ... | @@ -335,16 +336,16 @@ struct vec : public std::vector<T> |
335 | 336 | return *this; |
336 | 337 | } |
337 | 338 | |
338 | - /// Cast to a vec3 | |
339 | - operator stim::vec3<T>(){ | |
340 | - stim::vec3<T> r; | |
341 | - size_t N = std::min<size_t>(size(), 3); | |
342 | - for(size_t i = 0; i < N; i++) | |
343 | - r[i] = at(i); | |
344 | - return r; | |
345 | - } | |
346 | - | |
347 | - | |
339 | + /// Cast to a vec3 | |
340 | + operator stim::vec3<T>(){ | |
341 | + stim::vec3<T> r; | |
342 | + size_t N = std::min(size(), (size_t)3); | |
343 | + for(size_t i = 0; i < N; i++) | |
344 | + r[i] = at(i); | |
345 | + return r; | |
346 | + } | |
347 | + | |
348 | + | |
348 | 349 | /// Casting and assignment |
349 | 350 | template<typename Y> |
350 | 351 | vec<T> & operator=(vec<Y> rhs){ |
... | ... | @@ -355,16 +356,16 @@ struct vec : public std::vector<T> |
355 | 356 | at(i) = rhs[i]; |
356 | 357 | return *this; |
357 | 358 | } |
358 | - | |
359 | - /// Assign a vec = vec3 | |
360 | - template<typename Y> | |
361 | - vec<T> & operator=(vec3<Y> rhs) | |
362 | - { | |
363 | - resize(3); | |
364 | - for(size_t i=0; i<3; i++) | |
365 | - at(i) = rhs[i]; | |
366 | - return *this; | |
367 | - } | |
359 | + | |
360 | + /// Assign a vec = vec3 | |
361 | + template<typename Y> | |
362 | + vec<T> & operator=(vec3<Y> rhs) | |
363 | + { | |
364 | + resize(3); | |
365 | + for(size_t i=0; i<3; i++) | |
366 | + at(i) = rhs[i]; | |
367 | + return *this; | |
368 | + } | |
368 | 369 | |
369 | 370 | /// Unary minus (returns the negative of the vector) |
370 | 371 | vec<T> operator-() const{ | ... | ... |
stim/parser/arguments.h
... | ... | @@ -13,6 +13,44 @@ |
13 | 13 | #include <Windows.h> |
14 | 14 | #endif |
15 | 15 | |
16 | +/**The arglist class implements command line arguments. | |
17 | + Example: | |
18 | + | |
19 | + 1) Create an arglist instance: | |
20 | + | |
21 | + stim::arglist args; | |
22 | + | |
23 | + 2) Add arguments: | |
24 | + | |
25 | + args.add("help", "prints this help"); | |
26 | + args.add("foo", "foo takes a single integer value", "", "[intval]"); | |
27 | + args.add("bar", "bar takes two floating point values", "", "[value1], [value2]"); | |
28 | + | |
29 | + 3) Parse the command line: | |
30 | + | |
31 | + args.parse(argc, argv); | |
32 | + | |
33 | + 4) You generally want to immediately test for help and output available arguments: | |
34 | + | |
35 | + if(args["help"].is_set()) | |
36 | + std::cout<<args.str(); | |
37 | + | |
38 | + | |
39 | + | |
40 | + 5) Retrieve values: | |
41 | + | |
42 | + int foo; | |
43 | + float bar1, bar2; | |
44 | + if(args["foo"]) | |
45 | + foo = args["foo"].as_int(); | |
46 | + if(args["bar"]){ | |
47 | + bar1 = args["bar"].as_float(0); | |
48 | + bar2 = args["bar"].as_float(1); | |
49 | + } | |
50 | + | |
51 | + | |
52 | +**/ | |
53 | + | |
16 | 54 | namespace stim{ |
17 | 55 | |
18 | 56 | class cmd_option |
... | ... | @@ -258,10 +296,12 @@ namespace stim{ |
258 | 296 | flag = true; |
259 | 297 | } |
260 | 298 | |
261 | - bool is_set() | |
262 | - { | |
299 | + bool is_set() const{ | |
263 | 300 | return flag; |
264 | 301 | } |
302 | + operator bool() const{ | |
303 | + return is_set(); | |
304 | + } | |
265 | 305 | |
266 | 306 | }; |
267 | 307 | |
... | ... | @@ -271,43 +311,7 @@ namespace stim{ |
271 | 311 | size_t index; |
272 | 312 | }; |
273 | 313 | |
274 | - /**The arglist class implements command line arguments. | |
275 | - Example: | |
276 | - | |
277 | - 1) Create an arglist instance: | |
278 | - | |
279 | - stim::arglist args; | |
280 | - | |
281 | - 2) Add arguments: | |
282 | 314 | |
283 | - args.add("help", "prints this help"); | |
284 | - args.add("foo", "foo takes a single integer value", "", "[intval]"); | |
285 | - args.add("bar", "bar takes two floating point values", "", "[value1], [value2]"); | |
286 | - | |
287 | - 3) Parse the command line: | |
288 | - | |
289 | - args.parse(argc, argv); | |
290 | - | |
291 | - 4) You generally want to immediately test for help and output available arguments: | |
292 | - | |
293 | - if(args["help"].is_set()) | |
294 | - std::cout<<args.str(); | |
295 | - | |
296 | - | |
297 | - | |
298 | - 5) Retrieve values: | |
299 | - | |
300 | - int foo; | |
301 | - float bar1, bar2; | |
302 | - if(args["foo"]) | |
303 | - foo = args["foo"].as_int(); | |
304 | - if(args["bar"]){ | |
305 | - bar1 = args["bar"].as_float(0); | |
306 | - bar2 = args["bar"].as_float(1); | |
307 | - } | |
308 | - | |
309 | - | |
310 | - **/ | |
311 | 315 | |
312 | 316 | class arglist |
313 | 317 | { |
... | ... | @@ -528,21 +532,21 @@ namespace stim{ |
528 | 532 | std::vector<std::string> arg_vector(){ |
529 | 533 | return args; |
530 | 534 | } |
531 | - ///Returns an object describing the argument | |
532 | - | |
533 | - /// @param _name is the name of the requested argument | |
534 | - cmd_option operator[](std::string _name){ | |
535 | - std::vector<cmd_option>::iterator it; | |
536 | - it = find(opts.begin(), opts.end(), _name);// - opts.begin(); | |
535 | + ///Returns an object describing the argument | |
537 | 536 | |
538 | - if(it == opts.end()){ | |
539 | - std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; | |
540 | - exit(1); | |
541 | - } | |
537 | + /// @param _name is the name of the requested argument | |
538 | + cmd_option operator[](std::string _name){ | |
539 | + std::vector<cmd_option>::iterator it; | |
540 | + it = find(opts.begin(), opts.end(), _name);// - opts.begin(); | |
542 | 541 | |
543 | - return *it; | |
542 | + if(it == opts.end()){ | |
543 | + std::cout<<"ERROR - Unspecified parameter name: "<<_name<<std::endl; | |
544 | + exit(1); | |
544 | 545 | } |
545 | 546 | |
547 | + return *it; | |
548 | + } | |
549 | + | |
546 | 550 | |
547 | 551 | }; |
548 | 552 | ... | ... |
1 | +/// Reconstruct a 1D function from a 2D symmetric function. This function takes a 2D image f(x,y) as input and | |
2 | +/// builds a 1D function f(r) where r = sqrt(x^2 + y^2) to approximate this 2D function. | |
3 | +/// This is useful for several applications, such as: | |
4 | +/// 1) Calculating a 1D function from a noisy 2D image, when you know the 2D image is supposed to be symmetric | |
5 | +/// 2) Calculating the average value for every r = sqrt(x^2 + y^2) | |
6 | + | |
7 | +/// Given a set of function samples equally spaced by dx, calculate the two samples closest to x and the proximity ratio alpha. | |
8 | +/// This can be used to linearly interpolate between an array of equally spaced values. Given the query value x, the | |
9 | +/// interpolated value can be calculated as r = values[sample] * alpha + values[sample + 1] * (1 - alpha) | |
10 | +/// @param sample is the lowest bin closest to the query point x | |
11 | +/// @param alpha is the ratio of x between [sample, sample + 1] | |
12 | +/// @param dx is the spacing between values | |
13 | +/// @param x is the query point | |
14 | +template<typename T> | |
15 | +void lerp_alpha(T& sample, T& alpha, T dx, T x){ | |
16 | + sample = std::floor(x/dx); | |
17 | + alpha = 1 - (x - (b * dx)) / dx; | |
18 | +} | |
19 | + | |
20 | +/// This function assumes that the input image is square, that the # of samples are odd, and that r=0 is at the center | |
21 | +/// @param fr is an array of X elements that will store the reconstructed function | |
22 | +/// @param dr is the spacing (in pixels) between samples in fr | |
23 | +template<typename T> | |
24 | +void cpu_func1_from_symmetric2(T* fr, T& dr, T* fxy, size_t X){ | |
25 | + | |
26 | + if(X%2 == 0){ //the 2D function must be odd (a sample must be available for r=0) | |
27 | + std::err<<"Error, X = "<<X<<" must be odd."<<std::endl; | |
28 | + exit(1); | |
29 | + } | |
30 | + size_t C = X/2+1; //calculate the center pixel coordinate | |
31 | + size_t N = C * C; //number of values in the folded function | |
32 | + | |
33 | + // The first step is to fold the function 8 times to take advantage of symmetry in the grid | |
34 | + T* folded = (T*) malloc(sizeof(T) * N ); //allocate space for the folded function | |
35 | + memset(folded, 0, sizeof(T) * N); | |
36 | + char* count = (char*) malloc( N ); //allocate space for a counter for the folded function | |
37 | + memset(count, 0, sizeof(T) * N); | |
38 | + size_t xi, yi; //indices into the image f(xi, yi) | |
39 | + size_t xii, yii; //indices into the folded image | |
40 | + T v; //register to store the value at point (xi, yi) | |
41 | + for(xi = 0; xi < X; xi++){ | |
42 | + for(yi = 0; yi < X; yi++){ | |
43 | + v = fxy[yi * X + xi]; //retrieve f(x, y) | |
44 | + | |
45 | + xii = xi; | |
46 | + yii = yi; //initialize the indices into the folded image | |
47 | + | |
48 | + //fold the function along the x and y axes | |
49 | + if(xi > C) xii = 2 * C - xi - 1; //calculate the folded index of x | |
50 | + if(yi > C) yii = 2 * C - yi - 1; //calculate the folded index of y | |
51 | + | |
52 | + if(xii < yii) std::swap<T>(xii, yii); //fold the function again along the 45-degree line | |
53 | + | |
54 | + folded[yii * C + xii] += v; //add the value to the folded function | |
55 | + count[yii * C + xii] += 1; //add a counter to the counter table | |
56 | + } | |
57 | + } | |
58 | + | |
59 | + //divide out the counter to correct the folded function | |
60 | + for(size_t i = 0; i < N){ | |
61 | + folded[i] /= (T)count[i]; //divide out the counter | |
62 | + } | |
63 | + | |
64 | + T max_r = sqrt(X * X + Y * Y); //calculate the maximum r value, which will be along the image diagonal | |
65 | + T dr = max_r / (X - 1); //spacing between samples in the output function f(r) | |
66 | + | |
67 | + T* fA = (T*) malloc( sizeof(T) * X); //allocate space for a counter function storing alpha weights | |
68 | + memset(fA, 0, sizeof(T) * X); //zero out the alpha array | |
69 | + memset(fr, 0, sizeof(T) * X); //zero out the output function | |
70 | + | |
71 | + T r; //register to store the value of r at each point | |
72 | + size_t sample; | |
73 | + T alpha; | |
74 | + for(xi = 0; xi < C; xi++){ | |
75 | + for(yi = 0; yi < xi; yi++){ | |
76 | + r = sqrt(xi*xi + yi*yi); //calculate the value of r for the current (x, y) | |
77 | + lerp_alpha(sample, alpha, dr, r); //calculate the lowest nearby sample index and the associated alpha weight | |
78 | + fr[sample] += folded[yi * C + xi] * alpha; //sum the weighted value from the folded function | |
79 | + fA[sample] += alpha; //sum the weight | |
80 | + | |
81 | + if(sample < X - 1){ //if we aren't dealing with the last bin | |
82 | + fr[sample + 1] += folded[yi * C + xi] * (1.0 - alpha); //calculate the weighted value for the second point | |
83 | + fA[sample + 1] += 1 - alpha; //add the second alpha value | |
84 | + } | |
85 | + } | |
86 | + } | |
87 | + | |
88 | + //divide out the alpha values | |
89 | + for(size_t i = 0; i < X; i++) | |
90 | + fr[i] /= fA[i]; | |
91 | + | |
92 | + //free allocated memory | |
93 | + free(folded); | |
94 | + free(count); | |
95 | + free(fA); | |
96 | +} | |
0 | 97 | \ No newline at end of file | ... | ... |
1 | +// right now the size of CUDA STACK is set to 1000, increase it if you mean to make deeper tree | |
2 | +// data should be stored in row-major | |
3 | +// x1,x2,x3,x4,x5...... | |
4 | +// y1,y2,y3,y4,y5...... | |
5 | +// .................... | |
6 | +// .................... | |
7 | + | |
8 | +#ifndef KDTREE_H | |
9 | +#define KDTREE_H | |
10 | +#define stack_size 50 | |
11 | + | |
12 | +#include "device_launch_parameters.h" | |
13 | +#include <cuda.h> | |
14 | +#include <cuda_runtime_api.h> | |
15 | +#include "cuda_runtime.h" | |
16 | +#include <vector> | |
17 | +#include <cstring> | |
18 | +#include <float.h> | |
19 | +#include <iostream> | |
20 | +#include <algorithm> | |
21 | +#include <stim/cuda/cudatools/error.h> | |
22 | +#include <stim/visualization/aabbn.h> | |
23 | + | |
24 | +namespace stim { | |
25 | + namespace kdtree { | |
26 | + template<typename T, int D> // typename refers to float or double while D refers to dimension of points | |
27 | + struct point { | |
28 | + T dim[D]; // create a structure to store every one input point | |
29 | + }; | |
30 | + | |
31 | + template<typename T> | |
32 | + class kdnode { | |
33 | + public: | |
34 | + kdnode() { // constructor for initializing a kdnode | |
35 | + parent = NULL; // set every node's parent, left and right kdnode pointers to NULL | |
36 | + left = NULL; | |
37 | + right = NULL; | |
38 | + parent_idx = -1; // set parent node index to default -1 | |
39 | + left_idx = -1; | |
40 | + right_idx = -1; | |
41 | + split_value = -1; // set split_value to default -1 | |
42 | + } | |
43 | + int idx; // index of current node | |
44 | + int parent_idx, left_idx, right_idx; // index of parent, left and right nodes | |
45 | + kdnode *parent, *left, *right; // parent, left and right kdnodes | |
46 | + T split_value; // splitting value of current node | |
47 | + std::vector <size_t> indices; // it indicates the points' indices that current node has | |
48 | + size_t level; // tree level of current node | |
49 | + }; | |
50 | + } // end of namespace kdtree | |
51 | + | |
52 | + template <typename T, int D = 3> // set dimension of data to default 3 | |
53 | + class cpu_kdtree { | |
54 | + protected: | |
55 | + int current_axis; // current judging axis | |
56 | + int n_id; // store the total number of nodes | |
57 | + std::vector < typename kdtree::point<T, D> > *tmp_points; // transfer or temperary points | |
58 | + std::vector < typename kdtree::point<T, D> > cpu_tmp_points; // for cpu searching | |
59 | + kdtree::kdnode<T> *root; // root node | |
60 | + static cpu_kdtree<T, D> *cur_tree_ptr; | |
61 | + public: | |
62 | + cpu_kdtree() { // constructor for creating a cpu_kdtree | |
63 | + cur_tree_ptr = this; // create a class pointer points to the current class value | |
64 | + n_id = 0; // set total number of points to default 0 | |
65 | + } | |
66 | + ~cpu_kdtree() { // destructor of cpu_kdtree | |
67 | + std::vector <kdtree::kdnode<T>*> next_nodes; | |
68 | + next_nodes.push_back(root); | |
69 | + while (next_nodes.size()) { | |
70 | + std::vector <kdtree::kdnode<T>*> next_search_nodes; | |
71 | + while (next_nodes.size()) { | |
72 | + kdtree::kdnode<T> *cur = next_nodes.back(); | |
73 | + next_nodes.pop_back(); | |
74 | + if (cur->left) | |
75 | + next_search_nodes.push_back(cur->left); | |
76 | + if (cur->right) | |
77 | + next_search_nodes.push_back(cur->right); | |
78 | + delete cur; | |
79 | + } | |
80 | + next_nodes = next_search_nodes; | |
81 | + } | |
82 | + root = NULL; | |
83 | + } | |
84 | + void cpu_create(std::vector < typename kdtree::point<T, D> > &reference_points, size_t max_levels) { | |
85 | + tmp_points = &reference_points; | |
86 | + root = new kdtree::kdnode<T>(); // initializing the root node | |
87 | + root->idx = n_id++; // the index of root is 0 | |
88 | + root->level = 0; // tree level begins at 0 | |
89 | + root->indices.resize(reference_points.size()); // get the number of points | |
90 | + for (size_t i = 0; i < reference_points.size(); i++) { | |
91 | + root->indices[i] = i; // set indices of input points | |
92 | + } | |
93 | + std::vector <kdtree::kdnode<T>*> next_nodes; // next nodes | |
94 | + next_nodes.push_back(root); // push back the root node | |
95 | + while (next_nodes.size()) { | |
96 | + std::vector <kdtree::kdnode<T>*> next_search_nodes; // next search nodes | |
97 | + while (next_nodes.size()) { // two same WHILE is because we need to make a new vector to store nodes for search | |
98 | + kdtree::kdnode<T> *current_node = next_nodes.back(); // handle node one by one (right first) | |
99 | + next_nodes.pop_back(); // pop out current node in order to store next round of nodes | |
100 | + if (current_node->level < max_levels) { | |
101 | + if (current_node->indices.size() > 1) { // split if the nonleaf node contains more than one point | |
102 | + kdtree::kdnode<T> *left = new kdtree::kdnode<T>(); | |
103 | + kdtree::kdnode<T> *right = new kdtree::kdnode<T>(); | |
104 | + left->idx = n_id++; // set the index of current node's left node | |
105 | + right->idx = n_id++; | |
106 | + split(current_node, left, right); // split left and right and determine a node | |
107 | + std::vector <size_t> temp; // empty vecters of int | |
108 | + //temp.resize(current_node->indices.size()); | |
109 | + current_node->indices.swap(temp); // clean up current node's indices | |
110 | + current_node->left = left; | |
111 | + current_node->right = right; | |
112 | + current_node->left_idx = left->idx; | |
113 | + current_node->right_idx = right->idx; | |
114 | + if (right->indices.size()) | |
115 | + next_search_nodes.push_back(right); // left pop out first | |
116 | + if (left->indices.size()) | |
117 | + next_search_nodes.push_back(left); | |
118 | + } | |
119 | + } | |
120 | + } | |
121 | + next_nodes = next_search_nodes; // go deeper within the tree | |
122 | + } | |
123 | + } | |
124 | + static bool sort_points(const size_t a, const size_t b) { // create functor for std::sort | |
125 | + std::vector < typename kdtree::point<T, D> > &pts = *cur_tree_ptr->tmp_points; // put cur_tree_ptr to current input points' pointer | |
126 | + return pts[a].dim[cur_tree_ptr->current_axis] < pts[b].dim[cur_tree_ptr->current_axis]; | |
127 | + } | |
128 | + void split(kdtree::kdnode<T> *cur, kdtree::kdnode<T> *left, kdtree::kdnode<T> *right) { | |
129 | + std::vector < typename kdtree::point<T, D> > &pts = *tmp_points; | |
130 | + current_axis = cur->level % D; // indicate the judicative dimension or axis | |
131 | + std::sort(cur->indices.begin(), cur->indices.end(), sort_points); // using SortPoints as comparison function to sort the data | |
132 | + size_t mid_value = cur->indices[cur->indices.size() / 2]; // odd in the mid_value, even take the floor | |
133 | + cur->split_value = pts[mid_value].dim[current_axis]; // get the parent node | |
134 | + left->parent = cur; // set the parent of the next search nodes to current node | |
135 | + right->parent = cur; | |
136 | + left->level = cur->level + 1; // level + 1 | |
137 | + right->level = cur->level + 1; | |
138 | + left->parent_idx = cur->idx; // set its parent node's index | |
139 | + right->parent_idx = cur->idx; | |
140 | + for (size_t i = 0; i < cur->indices.size(); i++) { // split into left and right half-space one by one | |
141 | + size_t idx = cur->indices[i]; | |
142 | + if (pts[idx].dim[current_axis] < cur->split_value) | |
143 | + left->indices.push_back(idx); | |
144 | + else | |
145 | + right->indices.push_back(idx); | |
146 | + } | |
147 | + } | |
148 | + void create(T *h_reference_points, size_t reference_count, size_t max_levels) { | |
149 | + std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way | |
150 | + for (size_t j = 0; j < reference_count; j++) | |
151 | + for (size_t i = 0; i < D; i++) | |
152 | + reference_points[j].dim[i] = h_reference_points[j * D + i]; | |
153 | + cpu_create(reference_points, max_levels); | |
154 | + cpu_tmp_points = *tmp_points; | |
155 | + } | |
156 | + int get_num_nodes() const { // get the total number of nodes | |
157 | + return n_id; | |
158 | + } | |
159 | + kdtree::kdnode<T>* get_root() const { // get the root node of tree | |
160 | + return root; | |
161 | + } | |
162 | + T cpu_distance(const kdtree::point<T, D> &a, const kdtree::point<T, D> &b) { | |
163 | + T distance = 0; | |
164 | + | |
165 | + for (size_t i = 0; i < D; i++) { | |
166 | + T d = a.dim[i] - b.dim[i]; | |
167 | + distance += d*d; | |
168 | + } | |
169 | + return distance; | |
170 | + } | |
171 | + void cpu_search_at_node(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, size_t *index, T *distance, kdtree::kdnode<T> **node) { | |
172 | + T best_distance = FLT_MAX; // initialize the best distance to max of floating point | |
173 | + size_t best_index = 0; | |
174 | + std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points; | |
175 | + while (true) { | |
176 | + size_t split_axis = cur->level % D; | |
177 | + if (cur->left == NULL) { // risky but acceptable, same goes for right because left and right are in same pace | |
178 | + *node = cur; // pointer points to a pointer | |
179 | + for (size_t i = 0; i < cur->indices.size(); i++) { | |
180 | + size_t idx = cur->indices[i]; | |
181 | + T d = cpu_distance(query, pts[idx]); // compute distances | |
182 | + /// if we want to compute k nearest neighbor, we can input the last resul | |
183 | + /// (last_best_dist < dist < best_dist) to select the next point until reaching to k | |
184 | + if (d < best_distance) { | |
185 | + best_distance = d; | |
186 | + best_index = idx; // record the nearest neighbor index | |
187 | + } | |
188 | + } | |
189 | + break; // find the target point then break the loop | |
190 | + } | |
191 | + else if (query.dim[split_axis] < cur->split_value) { // if it has son node, visit the next node on either left side or right side | |
192 | + cur = cur->left; | |
193 | + } | |
194 | + else { | |
195 | + cur = cur->right; | |
196 | + } | |
197 | + } | |
198 | + *index = best_index; | |
199 | + *distance = best_distance; | |
200 | + } | |
201 | + void cpu_search_at_node_range(kdtree::kdnode<T> *cur, const kdtree::point<T, D> &query, T range, size_t *index, T *distance) { | |
202 | + T best_distance = FLT_MAX; // initialize the best distance to max of floating point | |
203 | + size_t best_index = 0; | |
204 | + std::vector < typename kdtree::point<T, D> > pts = cpu_tmp_points; | |
205 | + std::vector < typename kdtree::kdnode<T>*> next_node; | |
206 | + next_node.push_back(cur); | |
207 | + while (next_node.size()) { | |
208 | + std::vector<typename kdtree::kdnode<T>*> next_search; | |
209 | + while (next_node.size()) { | |
210 | + cur = next_node.back(); | |
211 | + next_node.pop_back(); | |
212 | + size_t split_axis = cur->level % D; | |
213 | + if (cur->left == NULL) { | |
214 | + for (size_t i = 0; i < cur->indices.size(); i++) { | |
215 | + size_t idx = cur->indices[i]; | |
216 | + T d = cpu_distance(query, pts[idx]); | |
217 | + if (d < best_distance) { | |
218 | + best_distance = d; | |
219 | + best_index = idx; | |
220 | + } | |
221 | + } | |
222 | + } | |
223 | + else { | |
224 | + T d = query.dim[split_axis] - cur->split_value; // computer distance along specific axis or dimension | |
225 | + /// there are three possibilities: on either left or right, and on both left and right | |
226 | + if (fabs(d) > range) { // absolute value of floating point to see if distance will be larger that best_dist | |
227 | + if (d < 0) | |
228 | + next_search.push_back(cur->left); // every left[split_axis] is less and equal to cur->split_value, so it is possible to find the nearest point in this region | |
229 | + else | |
230 | + next_search.push_back(cur->right); | |
231 | + } | |
232 | + else { // it is possible that nereast neighbor will appear on both left and right | |
233 | + next_search.push_back(cur->left); | |
234 | + next_search.push_back(cur->right); | |
235 | + } | |
236 | + } | |
237 | + } | |
238 | + next_node = next_search; // pop out at least one time | |
239 | + } | |
240 | + *index = best_index; | |
241 | + *distance = best_distance; | |
242 | + } | |
243 | + void cpu_search(T *h_query_points, size_t query_count, size_t *h_indices, T *h_distances) { | |
244 | + /// first convert the input query point into specific type | |
245 | + kdtree::point<T, D> query; | |
246 | + for (size_t j = 0; j < query_count; j++) { | |
247 | + for (size_t i = 0; i < D; i++) | |
248 | + query.dim[i] = h_query_points[j * D + i]; | |
249 | + /// find the nearest node, this will be the upper bound for the next time searching | |
250 | + kdtree::kdnode<T> *best_node = NULL; | |
251 | + T best_distance = FLT_MAX; | |
252 | + size_t best_index = 0; | |
253 | + T radius = 0; // radius for range | |
254 | + cpu_search_at_node(root, query, &best_index, &best_distance, &best_node); // simple search to rougly determine a result for next search step | |
255 | + radius = sqrt(best_distance); // It is possible that nearest will appear in another region | |
256 | + /// find other possibilities | |
257 | + kdtree::kdnode<T> *cur = best_node; | |
258 | + while (cur->parent != NULL) { // every node that you pass will be possible to be the best node | |
259 | + /// go up | |
260 | + kdtree::kdnode<T> *parent = cur->parent; // travel back to every node that we pass through | |
261 | + size_t split_axis = (parent->level) % D; | |
262 | + /// search other nodes | |
263 | + size_t tmp_index; | |
264 | + T tmp_distance = FLT_MAX; | |
265 | + if (fabs(parent->split_value - query.dim[split_axis]) <= radius) { | |
266 | + /// search opposite node | |
267 | + if (parent->left != cur) | |
268 | + cpu_search_at_node_range(parent->left, query, radius, &tmp_index, &tmp_distance); // to see whether it is its mother node's left son node | |
269 | + else | |
270 | + cpu_search_at_node_range(parent->right, query, radius, &tmp_index, &tmp_distance); | |
271 | + } | |
272 | + if (tmp_distance < best_distance) { | |
273 | + best_distance = tmp_distance; | |
274 | + best_index = tmp_index; | |
275 | + } | |
276 | + cur = parent; | |
277 | + } | |
278 | + h_indices[j] = best_index; | |
279 | + h_distances[j] = best_distance; | |
280 | + } | |
281 | + } | |
282 | + }; //end class kdtree | |
283 | + | |
284 | + template <typename T, int D> | |
285 | + cpu_kdtree<T, D>* cpu_kdtree<T, D>::cur_tree_ptr = NULL; // definition of cur_tree_ptr pointer points to the current class | |
286 | + | |
287 | + template <typename T> | |
288 | + struct cuda_kdnode { | |
289 | + int parent, left, right; | |
290 | + T split_value; | |
291 | + size_t num_index; // number of indices it has | |
292 | + int index; // the beginning index | |
293 | + size_t level; | |
294 | + }; | |
295 | + | |
296 | + template <typename T, int D> | |
297 | + __device__ T gpu_distance(kdtree::point<T, D> &a, kdtree::point<T, D> &b) { | |
298 | + T distance = 0; | |
299 | + | |
300 | + for (size_t i = 0; i < D; i++) { | |
301 | + T d = a.dim[i] - b.dim[i]; | |
302 | + distance += d*d; | |
303 | + } | |
304 | + return distance; | |
305 | + } | |
306 | + template <typename T, int D> | |
307 | + __device__ void search_at_node(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, int cur, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, int *d_node) { | |
308 | + T best_distance = FLT_MAX; | |
309 | + size_t best_index = 0; | |
310 | + | |
311 | + while (true) { // break until reach the bottom | |
312 | + int split_axis = nodes[cur].level % D; | |
313 | + if (nodes[cur].left == -1) { // check whether it has left node or not | |
314 | + *d_node = cur; | |
315 | + for (int i = 0; i < nodes[cur].num_index; i++) { | |
316 | + size_t idx = indices[nodes[cur].index + i]; | |
317 | + T dist = gpu_distance<T, D>(d_query_point, d_reference_points[idx]); | |
318 | + if (dist < best_distance) { | |
319 | + best_distance = dist; | |
320 | + best_index = idx; | |
321 | + } | |
322 | + } | |
323 | + break; | |
324 | + } | |
325 | + else if (d_query_point.dim[split_axis] < nodes[cur].split_value) { // jump into specific son node | |
326 | + cur = nodes[cur].left; | |
327 | + } | |
328 | + else { | |
329 | + cur = nodes[cur].right; | |
330 | + } | |
331 | + } | |
332 | + *d_distance = best_distance; | |
333 | + *d_index = best_index; | |
334 | + } | |
335 | + template <typename T, int D> | |
336 | + __device__ void search_at_node_range(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, int cur, T range, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { | |
337 | + T best_distance = FLT_MAX; | |
338 | + size_t best_index = 0; | |
339 | + | |
340 | + int next_nodes_pos = 0; // initialize pop out order index | |
341 | + next_nodes[id * stack_size + next_nodes_pos] = cur; // find data that belongs to the very specific thread | |
342 | + next_nodes_pos++; | |
343 | + | |
344 | + while (next_nodes_pos) { | |
345 | + int next_search_nodes_pos = 0; // record push back order index | |
346 | + while (next_nodes_pos) { | |
347 | + cur = next_nodes[id * stack_size + next_nodes_pos - 1]; // pop out the last push in one and keep poping out | |
348 | + next_nodes_pos--; | |
349 | + int split_axis = nodes[cur].level % D; | |
350 | + | |
351 | + if (nodes[cur].left == -1) { | |
352 | + for (int i = 0; i < nodes[cur].num_index; i++) { | |
353 | + int idx = indices[nodes[cur].index + i]; // all indices are stored in one array, pick up from every node's beginning index | |
354 | + T d = gpu_distance<T>(d_query_point, d_reference_points[idx]); | |
355 | + if (d < best_distance) { | |
356 | + best_distance = d; | |
357 | + best_index = idx; | |
358 | + } | |
359 | + } | |
360 | + } | |
361 | + else { | |
362 | + T d = d_query_point.dim[split_axis] - nodes[cur].split_value; | |
363 | + | |
364 | + if (fabs(d) > range) { | |
365 | + if (d < 0) { | |
366 | + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left; | |
367 | + next_search_nodes_pos++; | |
368 | + } | |
369 | + else { | |
370 | + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right; | |
371 | + next_search_nodes_pos++; | |
372 | + } | |
373 | + } | |
374 | + else { | |
375 | + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].right; | |
376 | + next_search_nodes_pos++; | |
377 | + next_search_nodes[id * stack_size + next_search_nodes_pos] = nodes[cur].left; | |
378 | + next_search_nodes_pos++; | |
379 | + if (next_search_nodes_pos > stack_size) { | |
380 | + printf("Thread conflict might be caused by thread %d, so please try smaller input max_tree_levels\n", id); | |
381 | + (*Judge)++; | |
382 | + } | |
383 | + } | |
384 | + } | |
385 | + } | |
386 | + for (int i = 0; i < next_search_nodes_pos; i++) | |
387 | + next_nodes[id * stack_size + i] = next_search_nodes[id * stack_size + i]; | |
388 | + next_nodes_pos = next_search_nodes_pos; | |
389 | + } | |
390 | + *d_distance = best_distance; | |
391 | + *d_index = best_index; | |
392 | + } | |
393 | + template <typename T, int D> | |
394 | + __device__ void search(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> &d_query_point, size_t *d_index, T *d_distance, size_t id, int *next_nodes, int *next_search_nodes, int *Judge) { | |
395 | + int best_node = 0; | |
396 | + T best_distance = FLT_MAX; | |
397 | + size_t best_index = 0; | |
398 | + T radius = 0; | |
399 | + | |
400 | + search_at_node<T, D>(nodes, indices, d_reference_points, 0, d_query_point, &best_index, &best_distance, &best_node); | |
401 | + radius = sqrt(best_distance); // get range | |
402 | + int cur = best_node; | |
403 | + | |
404 | + while (nodes[cur].parent != -1) { | |
405 | + int parent = nodes[cur].parent; | |
406 | + int split_axis = nodes[parent].level % D; | |
407 | + | |
408 | + T tmp_dist = FLT_MAX; | |
409 | + size_t tmp_idx; | |
410 | + if (fabs(nodes[parent].split_value - d_query_point.dim[split_axis]) <= radius) { | |
411 | + if (nodes[parent].left != cur) | |
412 | + search_at_node_range(nodes, indices, d_reference_points, d_query_point, nodes[parent].left, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge); | |
413 | + else | |
414 | + search_at_node_range(nodes, indices, d_reference_points, d_query_point, nodes[parent].right, radius, &tmp_idx, &tmp_dist, id, next_nodes, next_search_nodes, Judge); | |
415 | + } | |
416 | + if (tmp_dist < best_distance) { | |
417 | + best_distance = tmp_dist; | |
418 | + best_index = tmp_idx; | |
419 | + } | |
420 | + cur = parent; | |
421 | + } | |
422 | + *d_distance = sqrt(best_distance); | |
423 | + *d_index = best_index; | |
424 | + } | |
425 | + template <typename T, int D> | |
426 | + __global__ void search_batch(cuda_kdnode<T> *nodes, size_t *indices, kdtree::point<T, D> *d_reference_points, kdtree::point<T, D> *d_query_points, size_t d_query_count, size_t *d_indices, T *d_distances, int *next_nodes, int *next_search_nodes, int *Judge) { | |
427 | + size_t idx = blockIdx.x * blockDim.x + threadIdx.x; | |
428 | + if (idx >= d_query_count) return; // avoid segfault | |
429 | + | |
430 | + search<T, D>(nodes, indices, d_reference_points, d_query_points[idx], &d_indices[idx], &d_distances[idx], idx, next_nodes, next_search_nodes, Judge); // every query points are independent | |
431 | + } | |
432 | + | |
433 | + template <typename T, int D = 3> | |
434 | + class cuda_kdtree { | |
435 | + protected: | |
436 | + cuda_kdnode<T> *d_nodes; | |
437 | + size_t *d_index; | |
438 | + kdtree::point<T, D>* d_reference_points; | |
439 | + size_t npts; | |
440 | + int num_nodes; | |
441 | + public: | |
442 | + ~cuda_kdtree() { | |
443 | + HANDLE_ERROR(cudaFree(d_nodes)); | |
444 | + HANDLE_ERROR(cudaFree(d_index)); | |
445 | + HANDLE_ERROR(cudaFree(d_reference_points)); | |
446 | + } | |
447 | + | |
448 | + /// Create a KD-tree given a pointer to an array of reference points and the number of reference points | |
449 | + /// @param h_reference_points is a host array containing the reference points in (x0, y0, z0, ...., ) order | |
450 | + /// @param reference_count is the number of reference point in the array | |
451 | + /// @param max_levels is the deepest number of tree levels allowed | |
452 | + void create(T *h_reference_points, size_t reference_count, size_t max_levels = 3) { | |
453 | + if (max_levels > 10) { | |
454 | + std::cout<<"The max_tree_levels should be smaller!"<<std::endl; | |
455 | + exit(1); | |
456 | + } | |
457 | + //bb.init(&h_reference_points[0]); | |
458 | + //aaboundingboxing<T, D>(bb, h_reference_points, reference_count); | |
459 | + | |
460 | + std::vector < typename kdtree::point<T, D> > reference_points(reference_count); // restore the reference points in particular way | |
461 | + for (size_t j = 0; j < reference_count; j++) | |
462 | + for (size_t i = 0; i < D; i++) | |
463 | + reference_points[j].dim[i] = h_reference_points[j * D + i]; | |
464 | + cpu_kdtree<T, D> tree; // creating a tree on cpu | |
465 | + tree.cpu_create(reference_points, max_levels); // building a tree on cpu | |
466 | + kdtree::kdnode<T> *d_root = tree.get_root(); | |
467 | + num_nodes = tree.get_num_nodes(); | |
468 | + npts = reference_count; // also equals to reference_count | |
469 | + | |
470 | + HANDLE_ERROR(cudaMalloc((void**)&d_nodes, sizeof(cuda_kdnode<T>) * num_nodes)); // copy data from host to device | |
471 | + HANDLE_ERROR(cudaMalloc((void**)&d_index, sizeof(size_t) * npts)); | |
472 | + HANDLE_ERROR(cudaMalloc((void**)&d_reference_points, sizeof(kdtree::point<T, D>) * npts)); | |
473 | + | |
474 | + std::vector < cuda_kdnode<T> > tmp_nodes(num_nodes); | |
475 | + std::vector <size_t> indices(npts); | |
476 | + std::vector <kdtree::kdnode<T>*> next_nodes; | |
477 | + size_t cur_pos = 0; | |
478 | + next_nodes.push_back(d_root); | |
479 | + while (next_nodes.size()) { | |
480 | + std::vector <typename kdtree::kdnode<T>*> next_search_nodes; | |
481 | + while (next_nodes.size()) { | |
482 | + kdtree::kdnode<T> *cur = next_nodes.back(); | |
483 | + next_nodes.pop_back(); | |
484 | + int id = cur->idx; // the nodes at same level are independent | |
485 | + tmp_nodes[id].level = cur->level; | |
486 | + tmp_nodes[id].parent = cur->parent_idx; | |
487 | + tmp_nodes[id].left = cur->left_idx; | |
488 | + tmp_nodes[id].right = cur->right_idx; | |
489 | + tmp_nodes[id].split_value = cur->split_value; | |
490 | + tmp_nodes[id].num_index = cur->indices.size(); // number of index | |
491 | + if (cur->indices.size()) { | |
492 | + for (size_t i = 0; i < cur->indices.size(); i++) | |
493 | + indices[cur_pos + i] = cur->indices[i]; | |
494 | + | |
495 | + tmp_nodes[id].index = (int)cur_pos; // beginning index of reference_points that every bottom node has | |
496 | + cur_pos += cur->indices.size(); // store indices continuously for every query_point | |
497 | + } | |
498 | + else { | |
499 | + tmp_nodes[id].index = -1; | |
500 | + } | |
501 | + | |
502 | + if (cur->left) | |
503 | + next_search_nodes.push_back(cur->left); | |
504 | + | |
505 | + if (cur->right) | |
506 | + next_search_nodes.push_back(cur->right); | |
507 | + } | |
508 | + next_nodes = next_search_nodes; | |
509 | + } | |
510 | + HANDLE_ERROR(cudaMemcpy(d_nodes, &tmp_nodes[0], sizeof(cuda_kdnode<T>) * tmp_nodes.size(), cudaMemcpyHostToDevice)); | |
511 | + HANDLE_ERROR(cudaMemcpy(d_index, &indices[0], sizeof(size_t) * indices.size(), cudaMemcpyHostToDevice)); | |
512 | + HANDLE_ERROR(cudaMemcpy(d_reference_points, &reference_points[0], sizeof(kdtree::point<T, D>) * reference_points.size(), cudaMemcpyHostToDevice)); | |
513 | + } | |
514 | + | |
515 | + /// Search the KD tree for nearest neighbors to a set of specified query points | |
516 | + /// @param h_query_points an array of query points in (x0, y0, z0, ...) order | |
517 | + /// @param query_count is the number of query points | |
518 | + /// @param indices are the indices to the nearest reference point for each query points | |
519 | + /// @param distances is an array containing the distance between each query point and the nearest reference point | |
520 | + void search(T *h_query_points, size_t query_count, size_t *indices, T *distances) { | |
521 | + std::vector < typename kdtree::point<T, D> > query_points(query_count); | |
522 | + for (size_t j = 0; j < query_count; j++) | |
523 | + for (size_t i = 0; i < D; i++) | |
524 | + query_points[j].dim[i] = h_query_points[j * D + i]; | |
525 | + | |
526 | + unsigned int threads = (unsigned int)(query_points.size() > 1024 ? 1024 : query_points.size()); | |
527 | + unsigned int blocks = (unsigned int)(query_points.size() / threads + (query_points.size() % threads ? 1 : 0)); | |
528 | + | |
529 | + kdtree::point<T, D> *d_query_points; // create a pointer pointing to query points on gpu | |
530 | + size_t *d_indices; | |
531 | + T *d_distances; | |
532 | + | |
533 | + int *next_nodes; // create two STACK-like array | |
534 | + int *next_search_nodes; | |
535 | + | |
536 | + int *Judge = NULL; // judge variable to see whether one thread is overwrite another thread's memory | |
537 | + | |
538 | + HANDLE_ERROR(cudaMalloc((void**)&d_query_points, sizeof(T) * query_points.size() * D)); | |
539 | + HANDLE_ERROR(cudaMalloc((void**)&d_indices, sizeof(size_t) * query_points.size())); | |
540 | + HANDLE_ERROR(cudaMalloc((void**)&d_distances, sizeof(T) * query_points.size())); | |
541 | + HANDLE_ERROR(cudaMalloc((void**)&next_nodes, threads * blocks * stack_size * sizeof(int))); // STACK size right now is 50, you can change it if you mean to | |
542 | + HANDLE_ERROR(cudaMalloc((void**)&next_search_nodes, threads * blocks * stack_size * sizeof(int))); | |
543 | + HANDLE_ERROR(cudaMemcpy(d_query_points, &query_points[0], sizeof(T) * query_points.size() * D, cudaMemcpyHostToDevice)); | |
544 | + | |
545 | + search_batch<<<blocks, threads>>> (d_nodes, d_index, d_reference_points, d_query_points, query_points.size(), d_indices, d_distances, next_nodes, next_search_nodes, Judge); | |
546 | + | |
547 | + if (Judge == NULL) { // do the following work if the thread works safely | |
548 | + HANDLE_ERROR(cudaMemcpy(indices, d_indices, sizeof(size_t) * query_points.size(), cudaMemcpyDeviceToHost)); | |
549 | + HANDLE_ERROR(cudaMemcpy(distances, d_distances, sizeof(T) * query_points.size(), cudaMemcpyDeviceToHost)); | |
550 | + } | |
551 | + | |
552 | + HANDLE_ERROR(cudaFree(next_nodes)); | |
553 | + HANDLE_ERROR(cudaFree(next_search_nodes)); | |
554 | + HANDLE_ERROR(cudaFree(d_query_points)); | |
555 | + HANDLE_ERROR(cudaFree(d_indices)); | |
556 | + HANDLE_ERROR(cudaFree(d_distances)); | |
557 | + } | |
558 | + | |
559 | + /// Return the number of points in the KD tree | |
560 | + size_t num_points() { | |
561 | + return npts; | |
562 | + } | |
563 | + | |
564 | + stim::aabbn<T, D> getbox() { | |
565 | + size_t N = npts; | |
566 | + //std::vector < typename kdtree::point<T, D> > cpu_ref(npts); //allocate space on the CPU for the reference points | |
567 | + T* cpu_ref = (T*)malloc(N * D * sizeof(T)); //allocate space on the CPU for the reference points | |
568 | + HANDLE_ERROR(cudaMemcpy(cpu_ref, d_reference_points, N * D * sizeof(T), cudaMemcpyDeviceToHost)); //copy from GPU to CPU | |
569 | + | |
570 | + stim::aabbn<T, D> bb(cpu_ref); | |
571 | + | |
572 | + for (size_t i = 1; i < N; i++) { //for each reference point | |
573 | + //std::cout << "( " << cpu_ref[i * D + 0] << ", " << cpu_ref[i * D + 1] << ", " << cpu_ref[i * D + 2] << ")" << std::endl; | |
574 | + bb.insert(&cpu_ref[i * D]); | |
575 | + } | |
576 | + return bb; | |
577 | + } | |
578 | + | |
579 | + //generate an implicit distance field for the KD-tree | |
580 | + void dist_field3(T* dist, size_t* dims, stim::aabbn<T, 3> bb) { | |
581 | + size_t N = 1; //number of query points that make up the distance field | |
582 | + for (size_t d = 0; d < 3; d++) N *= dims[d]; //calculate the total number of query points | |
583 | + | |
584 | + //calculate the grid spatial parameters | |
585 | + T dx = 0; | |
586 | + if (dims[0] > 1) dx = bb.length(0) / dims[0]; | |
587 | + T dy = 0; | |
588 | + if (dims[1] > 1) dy = bb.length(1) / dims[1]; | |
589 | + T dz = 0; | |
590 | + if (dims[2] > 1) dz = bb.length(2) / dims[2]; | |
591 | + | |
592 | + T* Q = (T*)malloc(N * 3 * sizeof(T)); //allocate space for the query points | |
593 | + size_t i; | |
594 | + for (size_t z = 0; z < dims[2]; z++) { //for each query point (which is a point in the grid) | |
595 | + for (size_t y = 0; y < dims[1]; y++) { | |
596 | + for (size_t x = 0; x < dims[0]; x++) { | |
597 | + i = z * dims[1] * dims[0] + y * dims[0] + x; | |
598 | + Q[i * 3 + 0] = bb.low[0] + x * dx + dx / 2; | |
599 | + Q[i * 3 + 1] = bb.low[1] + y * dy + dy / 2; | |
600 | + Q[i * 3 + 2] = bb.low[2] + z * dz + dz / 2; | |
601 | + //std::cout << i<<" "<<Q[i * 3 + 0] << " " << Q[i * 3 + 1] << " " << Q[i * 3 + 2] << std::endl; | |
602 | + } | |
603 | + } | |
604 | + } | |
605 | + size_t* temp = (size_t*)malloc(N * sizeof(size_t)); //allocate space to store the indices (unused) | |
606 | + search(Q, N, temp, dist); | |
607 | + } | |
608 | + | |
609 | + //generate an implicit distance field for the KD-tree | |
610 | + void dist_field3(T* dist, size_t* dims) { | |
611 | + stim::aabbn<T, D> bb = getbox(); //get a bounding box around the tree | |
612 | + dist_field3(dist, dims, bb); | |
613 | + } | |
614 | + | |
615 | + }; | |
616 | +} //end namespace stim | |
617 | +#endif | |
0 | 618 | \ No newline at end of file | ... | ... |
1 | +#ifndef STIM_UTIL_FILESIZE_H | |
2 | +#define STIM_UTIL_FILESIZE_H | |
3 | + | |
4 | +#ifdef _WIN32 | |
5 | +#include <Windows.h> | |
6 | +#else | |
7 | +#include <sys/types.h> | |
8 | +#include <sys/stat.h> | |
9 | +#endif | |
10 | + | |
11 | +namespace stim{ | |
12 | +static size_t file_size(std::string filename){ | |
13 | +#ifdef _WIN32 | |
14 | + HANDLE hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); | |
15 | + if(hFile == INVALID_HANDLE_VALUE) return 0; | |
16 | + LARGE_INTEGER size; | |
17 | + if(!GetFileSizeEx(hFile, &size)){ | |
18 | + CloseHandle(hFile); | |
19 | + return 0; | |
20 | + } | |
21 | + CloseHandle(hFile); | |
22 | + return (size_t)size.QuadPart; | |
23 | +#else | |
24 | + struct stat sb; | |
25 | + stat(filename.c_str(), &sb); | |
26 | + return sb.st_size; | |
27 | +#endif | |
28 | +} | |
29 | + | |
30 | +} //end namespace stim | |
31 | + | |
32 | + | |
33 | + | |
34 | +#endif | ... | ... |
stim/visualization/aabb3.h
... | ... | @@ -2,51 +2,31 @@ |
2 | 2 | #define STIM_AABB3_H |
3 | 3 | |
4 | 4 | #include <stim/cuda/cudatools/callable.h> |
5 | +#include <stim/visualization/aabbn.h> | |
5 | 6 | |
6 | 7 | namespace stim{ |
7 | 8 | |
8 | -/// Structure for a 3D axis aligned bounding box | |
9 | + template<typename T> | |
10 | + using aabb3 = aabbn<T, 3>; | |
11 | +/*/// Structure for a 3D axis aligned bounding box | |
9 | 12 | template<typename T> |
10 | -struct aabb3{ | |
11 | - | |
12 | -//protected: | |
13 | - | |
14 | - T low[3]; //top left corner position | |
15 | - T high[3]; //dimensions along x and y and z | |
16 | - | |
17 | -//public: | |
18 | - | |
19 | - CUDA_CALLABLE aabb3(T x, T y, T z){ //initialize an axis aligned bounding box of size 0 at the given position | |
20 | - low[0] = high[0] = x; //set the position to the user specified coordinates | |
21 | - low[1] = high[1] = y; | |
22 | - low[2] = high[2] = z; | |
13 | +struct aabb3 : public aabbn<T, 3>{ | |
14 | + | |
15 | + aabb3() : aabbn() {} | |
16 | + aabb3(T x0, T y0, T z0, T x1, T y1, T z1){ | |
17 | + low[0] = x0; | |
18 | + low[1] = y0; | |
19 | + low[2] = z0; | |
20 | + high[0] = x0; | |
21 | + high[1] = x1; | |
22 | + high[2] = x2; | |
23 | 23 | } |
24 | 24 | |
25 | - //insert a point into the bounding box, growing the box appropriately | |
26 | - CUDA_CALLABLE void insert(T x, T y, T z){ | |
27 | - if(x < low[0]) low[0] = x; | |
28 | - if(y < low[1]) low[1] = y; | |
29 | - if(z < low[2]) low[2] = z; | |
30 | - | |
31 | - if(x > high[0]) high[0] = x; | |
32 | - if(y > high[1]) high[1] = y; | |
33 | - if(z > high[2]) high[2] = z; | |
34 | - } | |
35 | - | |
36 | - //trim the bounding box so that the lower bounds are (x, y, z) | |
37 | - CUDA_CALLABLE void trim_low(T x, T y, T z){ | |
38 | - if(low[0] < x) low[0] = x; | |
39 | - if(low[1] < y) low[1] = y; | |
40 | - if(low[2] < z) low[2] = z; | |
41 | - } | |
25 | + aabb3 aabbn<T, 3>() { | |
42 | 26 | |
43 | - CUDA_CALLABLE void trim_high(T x, T y, T z){ | |
44 | - if(high[0] > x) high[0] = x; | |
45 | - if(high[1] > y) high[1] = y; | |
46 | - if(high[2] > z) high[2] = z; | |
47 | 27 | } |
48 | 28 | |
49 | -}; | |
29 | +};*/ | |
50 | 30 | |
51 | 31 | } |
52 | 32 | ... | ... |
1 | +#ifndef STIM_AABBN_H | |
2 | +#define STIM_AABBN_H | |
3 | + | |
4 | +#include <vector> | |
5 | +#include <stim/cuda/cudatools/callable.h> | |
6 | + | |
7 | +namespace stim{ | |
8 | + | |
9 | +/// Structure for a 3D axis aligned bounding box | |
10 | +template<typename T, size_t D> | |
11 | +struct aabbn{ | |
12 | + | |
13 | +//protected: | |
14 | + | |
15 | + T low[D]; //top left corner position | |
16 | + T high[D]; //dimensions along x and y and z | |
17 | + | |
18 | + CUDA_CALLABLE void init(T* i) { | |
19 | + for (size_t d = 0; d < D; d++) | |
20 | + low[d] = high[d] = i[d]; | |
21 | + } | |
22 | + | |
23 | + CUDA_CALLABLE aabbn() {} | |
24 | + CUDA_CALLABLE aabbn(T* i) { | |
25 | + init(i); | |
26 | + } | |
27 | + | |
28 | + CUDA_CALLABLE aabbn(T x0, T x1) { | |
29 | + low[0] = x0; | |
30 | + high[0] = x1; | |
31 | + } | |
32 | + | |
33 | + CUDA_CALLABLE aabbn(T x0, T y0, T x1, T y1) : aabbn(x0, x1) { | |
34 | + low[1] = y0; | |
35 | + high[1] = y1; | |
36 | + } | |
37 | + | |
38 | + CUDA_CALLABLE aabbn(T x0, T y0, T z0, T x1, T y1, T z1) : aabbn(x0, y0, x1, y1) { | |
39 | + low[2] = z0; | |
40 | + high[2] = z1; | |
41 | + } | |
42 | + | |
43 | + | |
44 | + //insert a point into the bounding box, growing the box appropriately | |
45 | + CUDA_CALLABLE void insert(T* p){ | |
46 | + for(size_t d = 0; d < D; d++){ | |
47 | + if(p[d] < low[d]) low[d] = p[d]; | |
48 | + if(p[d] > high[d]) high[d] = p[d]; | |
49 | + } | |
50 | + } | |
51 | + | |
52 | + //trim the bounding box so that the lower bounds are b(x, y, z, ...) | |
53 | + CUDA_CALLABLE void trim_low(T* b){ | |
54 | + for(size_t d = 0; d < D; d++) | |
55 | + if(low[d] < b[d]) low[d] = b[d]; | |
56 | + } | |
57 | + | |
58 | + CUDA_CALLABLE void trim_high(T* b){ | |
59 | + for(size_t d = 0; d < D; d++) | |
60 | + if(low[d] > b[d]) low[d] = b[d]; | |
61 | + } | |
62 | + | |
63 | + CUDA_CALLABLE T length(size_t d) { | |
64 | + return high[d] - low[d]; | |
65 | + } | |
66 | + | |
67 | + CUDA_CALLABLE aabbn<T, D> operator*(T s) { | |
68 | + aabbn<T, D> newbox; | |
69 | + for (size_t d = 0; d < D; d++) { | |
70 | + T c = (low[d] + high[d]) / 2; | |
71 | + T l = high[d] - low[d]; | |
72 | + newbox.low[d] = c - l * s / 2; | |
73 | + newbox.high[d] = c + l * s / 2; | |
74 | + } | |
75 | + return newbox; | |
76 | + } | |
77 | + | |
78 | + //translate the box along dimension d a distance of v | |
79 | + CUDA_CALLABLE void translate(size_t d, T v) { | |
80 | + for (size_t d = 0; d < D; d++) { | |
81 | + low[d] += v; | |
82 | + high[d] += v; | |
83 | + } | |
84 | + } | |
85 | + | |
86 | +}; | |
87 | + | |
88 | +} | |
89 | + | |
90 | + | |
91 | +#endif | |
0 | 92 | \ No newline at end of file | ... | ... |
stim/visualization/cylinder.h
... | ... | @@ -4,6 +4,9 @@ |
4 | 4 | #include <stim/math/circle.h> |
5 | 5 | #include <stim/biomodels/centerline.h> |
6 | 6 | |
7 | +/* | |
8 | + | |
9 | +*/ | |
7 | 10 | |
8 | 11 | namespace stim |
9 | 12 | { |
... | ... | @@ -12,13 +15,13 @@ class cylinder |
12 | 15 | : public centerline<T> |
13 | 16 | { |
14 | 17 | private: |
15 | - stim::circle<T> s; //an arbitrary circle | |
16 | - std::vector<stim::circle<T> > e; //an array of circles that store the centerline | |
18 | + stim::circle<T> s; //an arbitrary circle | |
19 | + std::vector<stim::circle<T> > e; //an array of circles that store the centerline | |
17 | 20 | |
18 | 21 | std::vector<stim::vec3<T> > norms; |
19 | 22 | std::vector<stim::vec<T> > Us; |
20 | - std::vector<stim::vec<T> > mags; | |
21 | - std::vector< T > L; //length of the cylinder at each position. | |
23 | + std::vector<stim::vec<T> > mags; //stores a list of magnitudes for each point in the centerline (assuming mags[0] is the radius) | |
24 | + std::vector< T > L; //length of the cylinder at each position (pre-integration) | |
22 | 25 | |
23 | 26 | |
24 | 27 | using stim::centerline<T>::c; |
... | ... | @@ -61,9 +64,9 @@ class cylinder |
61 | 64 | return; |
62 | 65 | |
63 | 66 | //calculate each L. |
64 | - L.resize(inP.size()); | |
65 | - T temp = (T)0; | |
66 | - L[0] = 0; | |
67 | + L.resize(inP.size()); //the number of precomputed lengths will equal the number of points | |
68 | + T temp = (T)0; //length up to that point | |
69 | + L[0] = temp; | |
67 | 70 | for(size_t i = 1; i < L.size(); i++) |
68 | 71 | { |
69 | 72 | temp += (inP[i-1] - inP[i]).len(); |
... | ... | @@ -234,7 +237,7 @@ class cylinder |
234 | 237 | cylinder(std::vector< stim::vec3<T> > inP) |
235 | 238 | : centerline<T>(inP) |
236 | 239 | { |
237 | - std::vector< T > inM; //create an array of arbitrary magnitudes | |
240 | + std::vector< stim::vec<T> > inM; //create an array of arbitrary magnitudes | |
238 | 241 | |
239 | 242 | stim::vec<T> zero; |
240 | 243 | zero.push_back(0); |
... | ... | @@ -476,30 +479,30 @@ class cylinder |
476 | 479 | |
477 | 480 | std::vector< vec3<T> > result; |
478 | 481 | |
479 | - vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline | |
482 | + vec3<T> p0 = e[0].P; //initialize p0 to the first point on the centerline | |
480 | 483 | vec3<T> p1; |
481 | - unsigned N = size(); //number of points in the current centerline | |
484 | + unsigned N = size(); //number of points in the current centerline | |
482 | 485 | |
483 | 486 | //for each line segment on the centerline |
484 | 487 | for(unsigned int i = 1; i < N; i++){ |
485 | - p1 = e[i].P; //get the second point in the line segment | |
488 | + p1 = e[i].P; //get the second point in the line segment | |
486 | 489 | |
487 | - vec3<T> v = p1 - p0; //calculate the vector between these two points | |
488 | - T d = v.len(); //calculate the distance between these two points (length of the line segment) | |
490 | + vec3<T> v = p1 - p0; //calculate the vector between these two points | |
491 | + T d = v.len(); //calculate the distance between these two points (length of the line segment) | |
489 | 492 | |
490 | 493 | size_t nsteps = (size_t)std::ceil(d / spacing); //calculate the number of steps to take along the segment to meet the spacing criteria |
491 | - T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points | |
494 | + T stepsize = (T)1.0 / nsteps; //calculate the parametric step size between new centerline points | |
492 | 495 | |
493 | 496 | //for each step along the line segment |
494 | 497 | for(unsigned s = 0; s < nsteps; s++){ |
495 | - T alpha = stepsize * s; //calculate the fraction of the distance along the line segment covered | |
496 | - result.push_back(p0 + alpha * v); //push the point at alpha position along the line segment | |
498 | + T alpha = stepsize * s; //calculate the fraction of the distance along the line segment covered | |
499 | + result.push_back(p0 + alpha * v); //push the point at alpha position along the line segment | |
497 | 500 | } |
498 | 501 | |
499 | - p0 = p1; //shift the points to move to the next line segment | |
502 | + p0 = p1; //shift the points to move to the next line segment | |
500 | 503 | } |
501 | 504 | |
502 | - result.push_back(e[size() - 1].P); //push the last point in the centerline | |
505 | + result.push_back(e[size() - 1].P); //push the last point in the centerline | |
503 | 506 | |
504 | 507 | return cylinder<T>(result); |
505 | 508 | ... | ... |