Commit 0d84034253937b68843ec545a0050dd6abaccf5c
0 parents
public release commit
Showing
11 changed files
with
2164 additions
and
0 deletions
Show diff stats
1 | +++ a/CMakeLists.txt | |
1 | +#Specify the version being used aswell as the language | |
2 | +cmake_minimum_required(VERSION 2.8) | |
3 | + | |
4 | +#Name your project here | |
5 | +project(ga-gpu) | |
6 | + | |
7 | +#set the module directory | |
8 | +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") | |
9 | + | |
10 | +#default to release mode | |
11 | +if(NOT CMAKE_BUILD_TYPE) | |
12 | + set(CMAKE_BUILD_TYPE Release) | |
13 | +endif(NOT CMAKE_BUILD_TYPE) | |
14 | + | |
15 | +#build the executable in the binary directory on MS Visual Studio | |
16 | +if ( MSVC ) | |
17 | + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") | |
18 | + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") | |
19 | +endif ( MSVC ) | |
20 | +#MAYBE REMOVE----------------- | |
21 | +#set C++11 flags if using GCC | |
22 | +if( CMAKE_COMPILER_IS_GNUCC ) | |
23 | +# SET( CMAKE_CXX_FLAGS "-std=c++11") | |
24 | + set(CMAKE_CXX_FLAGS "-std=c++11 -D_FORCE_INLINES") | |
25 | +# SET( CUDA_NVCC_FLAGS "-std=c++11") | |
26 | +endif( CMAKE_COMPILER_IS_GNUCC ) | |
27 | + | |
28 | +SET( CUDA_NVCC_FLAGS "--gpu-architecture=compute_50 --gpu-code=sm_50,compute_50") | |
29 | +#----------------------------- | |
30 | + | |
31 | + | |
32 | + | |
33 | +#find packages----------------------------------- | |
34 | +#find OpenCV | |
35 | +find_package(OpenCV REQUIRED) | |
36 | +add_definitions(-DUSING_OPENCV) | |
37 | + | |
38 | +#find the pthreads package | |
39 | +find_package(Threads) | |
40 | + | |
41 | +#find the X11 package | |
42 | +find_package(X11) | |
43 | + | |
44 | +#find the STIM library | |
45 | +find_package(STIM) | |
46 | + | |
47 | +#find CUDA, mostly for LA stuff using cuBLAS | |
48 | +find_package(CUDA REQUIRED) | |
49 | + | |
50 | +#find Boost for Unix-based file lists | |
51 | +if( CMAKE_COMPILER_IS_GNUCC ) | |
52 | + find_package(Boost COMPONENTS filesystem system) | |
53 | + if(Boost_FOUND) | |
54 | + include_directories(${Boost_INCLUDE_DIR}) | |
55 | + else() | |
56 | + message(FATAL_ERROR "HSIproc requires Boost::filesystem and Boost::system when using GCC") | |
57 | + endif() | |
58 | +endif() | |
59 | + | |
60 | +#find FANN | |
61 | +#find_package(FANN REQUIRED) | |
62 | + | |
63 | +#find the GLUT library for visualization | |
64 | +#find_package(OpenGL REQUIRED) | |
65 | +#find_package(GLUT REQUIRED) | |
66 | +#if(WIN32) | |
67 | +# find_package(GLEW REQUIRED) | |
68 | +# include_directories(${GLEW_INCLUDE_DIR}) | |
69 | +#endif(WIN32) | |
70 | + | |
71 | +#find LAPACK and supporting link_libraries | |
72 | +find_package(LAPACKE REQUIRED) | |
73 | + | |
74 | +#include include directories | |
75 | +include_directories(${CUDA_INCLUDE_DIRS} | |
76 | + ${OpenCV_INCLUDE_DIRS} | |
77 | + ${LAPACKE_INCLUDE_DIR} | |
78 | + ${STIM_INCLUDE_DIRS} | |
79 | + ${OpenGL_INCLUDE_DIRS} | |
80 | +# ${GLUT_INCLUDE_DIR} | |
81 | + ${FANN_INCLUDE_DIRS} | |
82 | + "${CMAKE_SOURCE_DIR}/src" | |
83 | +) | |
84 | + | |
85 | +#Assign a variable for all of the header files in this project | |
86 | +include_directories("${CMAKE_SOURCE_DIR}/src") | |
87 | +#file(GLOB GACPU_H "${CMAKE_SOURCE_DIR}/src/gacpu/*.h") | |
88 | +file(GLOB GAGPU_H "${CMAKE_SOURCE_DIR}/src/*.h") | |
89 | +#file(GLOB GA_H "${CMAKE_SOURCE_DIR}/src/*.h") | |
90 | + | |
91 | +#Assign source files to the appropriate variables to easily associate them with executables | |
92 | +#file(GLOB GA_CPU_SRC "${CMAKE_SOURCE_DIR}/src/gacpu/*.cpp") | |
93 | +file(GLOB GA_GPU_SRC "${CMAKE_SOURCE_DIR}/src/*.c*") | |
94 | + | |
95 | + | |
96 | +#create an executable file | |
97 | +cuda_add_executable(ga-gpu | |
98 | + ${GAGPU_H} | |
99 | +# ${GA_H} | |
100 | + ${GA_GPU_SRC} | |
101 | +) | |
102 | +target_link_libraries(ga-gpu ${CUDA_LIBRARIES} | |
103 | + ${CUDA_CUBLAS_LIBRARIES} | |
104 | + ${CUDA_CUFFT_LIBRARIES} | |
105 | + ${LAPACKE_LIBRARIES} | |
106 | + ${LAPACK_LIBRARIES} | |
107 | + ${BLAS_LIBRARIES} | |
108 | + ${CMAKE_THREAD_LIBS_INIT} | |
109 | + ${X11_LIBRARIES} | |
110 | + ${OpenCV_LIBS} | |
111 | +) | |
112 | + | |
113 | + | |
114 | +#create the PROC executable---------------------------------------------- | |
115 | + | |
116 | +#create an executable file | |
117 | +#add_executable(hsiga | |
118 | +# ${GACPU_H} | |
119 | +# ${GA_H} | |
120 | +# ${GA_CPU_SRC} | |
121 | +#) | |
122 | +#target_link_libraries(hsiga ${LAPACKE_LIBRARIES} | |
123 | +# ${LAPACK_LIBRARIES} | |
124 | +# ${BLAS_LIBRARIES} | |
125 | +# ${CMAKE_THREAD_LIBS_INIT} | |
126 | +# ${X11_LIBRARIES} | |
127 | +# ${OpenCV_LIBS} | |
128 | +#) | |
129 | + | |
130 | + | |
131 | + | |
132 | +#if Boost is found, set an environment variable to use with preprocessor directives | |
133 | +if(Boost_FILESYSTEM_FOUND) | |
134 | +# if(BUILD_GACPU) | |
135 | +# target_link_libraries(hsiga ${Boost_FILESYSTEM_LIBRARIES} | |
136 | +# ${Boost_SYSTEM_LIBRARY} | |
137 | +# ) | |
138 | + #message(${Boost_FILESYSTEM_LIBRARIES}) | |
139 | +# endif(BUILD_GACPU) | |
140 | +# if(BUILD_GAGPU) | |
141 | + target_link_libraries(ga-gpu ${Boost_FILESYSTEM_LIBRARIES} | |
142 | + ${Boost_SYSTEM_LIBRARY} | |
143 | + ) | |
144 | +# endif(BUILD_GAGPU) | |
145 | +endif(Boost_FILESYSTEM_FOUND) | ... | ... |
1 | +++ a/FindGLEW.cmake | |
1 | +# Copyright (c) 2012-2016 DreamWorks Animation LLC | |
2 | +# | |
3 | +# All rights reserved. This software is distributed under the | |
4 | +# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ ) | |
5 | +# | |
6 | +# Redistributions of source code must retain the above copyright | |
7 | +# and license notice and the following restrictions and disclaimer. | |
8 | +# | |
9 | +# * Neither the name of DreamWorks Animation nor the names of | |
10 | +# its contributors may be used to endorse or promote products derived | |
11 | +# from this software without specific prior written permission. | |
12 | +# | |
13 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
14 | +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
15 | +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
16 | +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
17 | +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL, | |
18 | +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
19 | +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
20 | +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
21 | +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | +# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE | |
25 | +# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00. | |
26 | +# | |
27 | + | |
28 | +#-*-cmake-*- | |
29 | +# - Find GLEW | |
30 | +# | |
31 | +# Author : Nicholas Yue yue.nicholas@gmail.com | |
32 | +# | |
33 | +# This auxiliary CMake file helps in find the GLEW headers and libraries | |
34 | +# | |
35 | +# GLEW_FOUND set if Glew is found. | |
36 | +# GLEW_INCLUDE_DIR GLEW's include directory | |
37 | +# GLEW_glew_LIBRARY GLEW libraries | |
38 | +# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context) | |
39 | + | |
40 | +FIND_PACKAGE ( PackageHandleStandardArgs ) | |
41 | + | |
42 | +FIND_PATH( GLEW_LOCATION include/GL/glew.h | |
43 | + "$ENV{GLEW_ROOT}" | |
44 | + NO_DEFAULT_PATH | |
45 | + NO_SYSTEM_ENVIRONMENT_PATH | |
46 | + ) | |
47 | + | |
48 | +FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW | |
49 | + REQUIRED_VARS GLEW_LOCATION | |
50 | + ) | |
51 | + | |
52 | +IF ( GLEW_LOCATION ) | |
53 | + | |
54 | + SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path") | |
55 | + | |
56 | + SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) | |
57 | + IF (GLEW_USE_STATIC_LIBS) | |
58 | + IF (APPLE) | |
59 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") | |
60 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | |
61 | + NO_DEFAULT_PATH | |
62 | + NO_SYSTEM_ENVIRONMENT_PATH | |
63 | + ) | |
64 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | |
65 | + NO_DEFAULT_PATH | |
66 | + NO_SYSTEM_ENVIRONMENT_PATH | |
67 | + ) | |
68 | + # MESSAGE ( "APPLE STATIC" ) | |
69 | + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} ) | |
70 | + ELSEIF (WIN32) | |
71 | + # Link library | |
72 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") | |
73 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib ) | |
74 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib ) | |
75 | + ELSE (APPLE) | |
76 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") | |
77 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | |
78 | + NO_DEFAULT_PATH | |
79 | + NO_SYSTEM_ENVIRONMENT_PATH | |
80 | + ) | |
81 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | |
82 | + NO_DEFAULT_PATH | |
83 | + NO_SYSTEM_ENVIRONMENT_PATH | |
84 | + ) | |
85 | + # MESSAGE ( "LINUX STATIC" ) | |
86 | + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} ) | |
87 | + ENDIF (APPLE) | |
88 | + ELSE () | |
89 | + IF (APPLE) | |
90 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib") | |
91 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib ) | |
92 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib ) | |
93 | + ELSEIF (WIN32) | |
94 | + # Link library | |
95 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") | |
96 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib ) | |
97 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib ) | |
98 | + # Load library | |
99 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll") | |
100 | + FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin | |
101 | + NO_DEFAULT_PATH | |
102 | + NO_SYSTEM_ENVIRONMENT_PATH | |
103 | + ) | |
104 | + FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin | |
105 | + NO_DEFAULT_PATH | |
106 | + NO_SYSTEM_ENVIRONMENT_PATH | |
107 | + ) | |
108 | + ELSE (APPLE) | |
109 | + # Unices | |
110 | + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib | |
111 | + NO_DEFAULT_PATH | |
112 | + NO_SYSTEM_ENVIRONMENT_PATH | |
113 | + ) | |
114 | + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib | |
115 | + NO_DEFAULT_PATH | |
116 | + NO_SYSTEM_ENVIRONMENT_PATH | |
117 | + ) | |
118 | + ENDIF (APPLE) | |
119 | + ENDIF () | |
120 | + # MUST reset | |
121 | + SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES}) | |
122 | + | |
123 | + SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library") | |
124 | + SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library") | |
125 | + | |
126 | +ENDIF () | ... | ... |
1 | +++ a/FindGLUT.cmake | |
1 | +#.rst: | |
2 | +# FindGLUT | |
3 | +# -------- | |
4 | +# | |
5 | +# try to find glut library and include files. | |
6 | +# | |
7 | +# IMPORTED Targets | |
8 | +# ^^^^^^^^^^^^^^^^ | |
9 | +# | |
10 | +# This module defines the :prop_tgt:`IMPORTED` targets: | |
11 | +# | |
12 | +# ``GLUT::GLUT`` | |
13 | +# Defined if the system has GLUT. | |
14 | +# | |
15 | +# Result Variables | |
16 | +# ^^^^^^^^^^^^^^^^ | |
17 | +# | |
18 | +# This module sets the following variables: | |
19 | +# | |
20 | +# :: | |
21 | +# | |
22 | +# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc. | |
23 | +# GLUT_LIBRARIES, the libraries to link against | |
24 | +# GLUT_FOUND, If false, do not try to use GLUT. | |
25 | +# | |
26 | +# Also defined, but not for general use are: | |
27 | +# | |
28 | +# :: | |
29 | +# | |
30 | +# GLUT_glut_LIBRARY = the full path to the glut library. | |
31 | +# GLUT_Xmu_LIBRARY = the full path to the Xmu library. | |
32 | +# GLUT_Xi_LIBRARY = the full path to the Xi Library. | |
33 | + | |
34 | +#============================================================================= | |
35 | +# Copyright 2001-2009 Kitware, Inc. | |
36 | +# | |
37 | +# Distributed under the OSI-approved BSD License (the "License"); | |
38 | +# see accompanying file Copyright.txt for details. | |
39 | +# | |
40 | +# This software is distributed WITHOUT ANY WARRANTY; without even the | |
41 | +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
42 | +# See the License for more information. | |
43 | +#============================================================================= | |
44 | +# (To distribute this file outside of CMake, substitute the full | |
45 | +# License text for the above reference.) | |
46 | + | |
47 | +if (WIN32) | |
48 | + find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h | |
49 | + PATHS $ENV{GLUT_ROOT_PATH}/include ) | |
50 | + | |
51 | + if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | |
52 | + find_library( GLUT_glut_LIBRARY NAMES freeglut | |
53 | + PATHS | |
54 | + $ENV{GLUT_ROOT_PATH}/lib/x64 | |
55 | + | |
56 | + NO_DEFAULT_PATH | |
57 | + ) | |
58 | + else( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | |
59 | + find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut | |
60 | + PATHS | |
61 | + ${OPENGL_LIBRARY_DIR} | |
62 | + $ENV{GLUT_ROOT_PATH}/lib | |
63 | + ) | |
64 | + endif( CMAKE_SIZEOF_VOID_P EQUAL 8 ) | |
65 | + | |
66 | +else () | |
67 | + | |
68 | + if (APPLE) | |
69 | + find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR}) | |
70 | + find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX") | |
71 | + find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX") | |
72 | + | |
73 | + if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa) | |
74 | + add_library(GLUT::Cocoa UNKNOWN IMPORTED) | |
75 | + # Cocoa should always be a Framework, but we check to make sure. | |
76 | + if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$") | |
77 | + set_target_properties(GLUT::Cocoa PROPERTIES | |
78 | + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}") | |
79 | + else() | |
80 | + set_target_properties(GLUT::Cocoa PROPERTIES | |
81 | + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}") | |
82 | + endif() | |
83 | + endif() | |
84 | + else () | |
85 | + | |
86 | + if (BEOS) | |
87 | + | |
88 | + set(_GLUT_INC_DIR /boot/develop/headers/os/opengl) | |
89 | + set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86) | |
90 | + | |
91 | + else() | |
92 | + | |
93 | + find_library( GLUT_Xi_LIBRARY Xi | |
94 | + /usr/openwin/lib | |
95 | + ) | |
96 | + | |
97 | + find_library( GLUT_Xmu_LIBRARY Xmu | |
98 | + /usr/openwin/lib | |
99 | + ) | |
100 | + | |
101 | + if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi) | |
102 | + add_library(GLUT::Xi UNKNOWN IMPORTED) | |
103 | + set_target_properties(GLUT::Xi PROPERTIES | |
104 | + IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}") | |
105 | + endif() | |
106 | + | |
107 | + if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu) | |
108 | + add_library(GLUT::Xmu UNKNOWN IMPORTED) | |
109 | + set_target_properties(GLUT::Xmu PROPERTIES | |
110 | + IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}") | |
111 | + endif() | |
112 | + | |
113 | + endif () | |
114 | + | |
115 | + find_path( GLUT_INCLUDE_DIR GL/glut.h | |
116 | + /usr/include/GL | |
117 | + /usr/openwin/share/include | |
118 | + /usr/openwin/include | |
119 | + /opt/graphics/OpenGL/include | |
120 | + /opt/graphics/OpenGL/contrib/libglut | |
121 | + ${_GLUT_INC_DIR} | |
122 | + ) | |
123 | + | |
124 | + find_library( GLUT_glut_LIBRARY glut | |
125 | + /usr/openwin/lib | |
126 | + ${_GLUT_glut_LIB_DIR} | |
127 | + ) | |
128 | + | |
129 | + unset(_GLUT_INC_DIR) | |
130 | + unset(_GLUT_glut_LIB_DIR) | |
131 | + | |
132 | + endif () | |
133 | + | |
134 | +endif () | |
135 | + | |
136 | +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR) | |
137 | + | |
138 | +if (GLUT_FOUND) | |
139 | + # Is -lXi and -lXmu required on all platforms that have it? | |
140 | + # If not, we need some way to figure out what platform we are on. | |
141 | + set( GLUT_LIBRARIES | |
142 | + ${GLUT_glut_LIBRARY} | |
143 | + ${GLUT_Xmu_LIBRARY} | |
144 | + ${GLUT_Xi_LIBRARY} | |
145 | + ${GLUT_cocoa_LIBRARY} | |
146 | + ) | |
147 | + | |
148 | + if(NOT TARGET GLUT::GLUT) | |
149 | + add_library(GLUT::GLUT UNKNOWN IMPORTED) | |
150 | + set_target_properties(GLUT::GLUT PROPERTIES | |
151 | + INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}") | |
152 | + if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$") | |
153 | + set_target_properties(GLUT::GLUT PROPERTIES | |
154 | + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}") | |
155 | + else() | |
156 | + set_target_properties(GLUT::GLUT PROPERTIES | |
157 | + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}") | |
158 | + endif() | |
159 | + | |
160 | + if(TARGET GLUT::Xmu) | |
161 | + set_property(TARGET GLUT::GLUT APPEND | |
162 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu) | |
163 | + endif() | |
164 | + | |
165 | + if(TARGET GLUT::Xi) | |
166 | + set_property(TARGET GLUT::GLUT APPEND | |
167 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi) | |
168 | + endif() | |
169 | + | |
170 | + if(TARGET GLUT::Cocoa) | |
171 | + set_property(TARGET GLUT::GLUT APPEND | |
172 | + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa) | |
173 | + endif() | |
174 | + endif() | |
175 | + | |
176 | + #The following deprecated settings are for backwards compatibility with CMake1.4 | |
177 | + set (GLUT_LIBRARY ${GLUT_LIBRARIES}) | |
178 | + set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR}) | |
179 | +endif() | |
180 | + | |
181 | +mark_as_advanced( | |
182 | + GLUT_INCLUDE_DIR | |
183 | + GLUT_glut_LIBRARY | |
184 | + GLUT_Xmu_LIBRARY | |
185 | + GLUT_Xi_LIBRARY | |
186 | + ) | ... | ... |
1 | +++ a/FindLAPACKE.cmake | |
1 | +# - Try to find LAPACKE | |
2 | +# | |
3 | +# Once done this will define | |
4 | +# LAPACKE_FOUND - System has LAPACKE | |
5 | +# LAPACKE_INCLUDE_DIRS - The LAPACKE include directories | |
6 | +# LAPACKE_LIBRARIES - The libraries needed to use LAPACKE | |
7 | +# LAPACKE_DEFINITIONS - Compiler switches required for using LAPACKE | |
8 | +# | |
9 | +# Usually, LAPACKE requires LAPACK and the BLAS. This module does | |
10 | +# not enforce anything about that. | |
11 | + | |
12 | +find_path(LAPACKE_INCLUDE_DIR | |
13 | + NAMES lapacke.h | |
14 | + PATHS $ENV{LAPACK_PATH} ${INCLUDE_INSTALL_DIR} | |
15 | + PATHS ENV INCLUDE) | |
16 | + | |
17 | +find_library(LAPACKE_LIBRARY liblapacke lapacke | |
18 | + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR} | |
19 | + PATHS ENV LIBRARY_PATH | |
20 | + PATHS ENV LD_LIBRARY_PATH) | |
21 | + | |
22 | +if(MSVC) | |
23 | + find_library(LAPACK_LIBRARY liblapack lapack | |
24 | + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR} | |
25 | + PATHS ENV LIBRARY_PATH | |
26 | + PATHS ENV LD_LIBRARY_PATH) | |
27 | + | |
28 | + find_library(BLAS_LIBRARY libblas blas | |
29 | + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR} | |
30 | + PATHS ENV LIBRARY_PATH | |
31 | + PATHS ENV LD_LIBRARY_PATH) | |
32 | + | |
33 | +else() | |
34 | + find_library(LAPACK REQUIRED) | |
35 | + find_library(BLAS REQUIRED) | |
36 | +endif() | |
37 | +set(LAPACKE_LIBRARIES ${LAPACKE_LIBRARY} ${LAPACK_LIBRARY} ${BLAS_LIBRARY}) | |
38 | + | |
39 | +include(FindPackageHandleStandardArgs) | |
40 | +find_package_handle_standard_args(LAPACKE DEFAULT_MSG | |
41 | + LAPACKE_INCLUDE_DIR | |
42 | + LAPACKE_LIBRARIES) | |
43 | +mark_as_advanced(LAPACKE_INCLUDE_DIR LAPACKE_LIBRARIES) | ... | ... |
1 | +++ a/FindSTIM.cmake | |
1 | +# finds the STIM library (downloads it if it isn't present) | |
2 | +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository) | |
3 | + | |
4 | +include(FindPackageHandleStandardArgs) | |
5 | + | |
6 | +set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH}) | |
7 | + | |
8 | +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) | |
9 | + | |
10 | +if(STIM_FOUND) | |
11 | + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR}) | |
12 | +elseif(STIM_FOUND) | |
13 | + #if the STIM library isn't found, download it | |
14 | + #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists | |
15 | + #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git") | |
16 | + #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) | |
17 | + #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH) | |
18 | + message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.") | |
19 | + message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib") | |
20 | +endif(STIM_FOUND) | |
21 | + | |
22 | +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) | ... | ... |
1 | +++ a/src/basic_functions.h | |
1 | +#include <stdio.h> | |
2 | + | |
3 | + | |
4 | +size_t* sortIndx(float* input, size_t size){ | |
5 | + //sort indices of score in ascending order (fitness value) | |
6 | + size_t *idx; | |
7 | + idx = (size_t*) malloc (size * sizeof (size_t)); | |
8 | + for (size_t i = 0; i < size; i++) | |
9 | + idx[i] = i; | |
10 | + | |
11 | + for (size_t i=0; i<size; i++){ | |
12 | + for (size_t j=i+1; j<size; j++){ | |
13 | + if (input[idx[i]] < input[idx[j]]){ | |
14 | + std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error | |
15 | + } | |
16 | + } | |
17 | + } | |
18 | + return idx; //use as sortSIdx in selection | |
19 | +} | |
20 | + | |
21 | + | |
22 | +template<typename T> | |
23 | +void mtxMul(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){ | |
24 | + //compute output matrix M3 of size row1 X column2 and data is column major | |
25 | + for(size_t i = 0 ; i <r1; i++){ | |
26 | + for(size_t j = 0; j< c2; j++){ | |
27 | + T temp = 0; | |
28 | + for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication | |
29 | + temp+= M1[i * c1 + k] * M2[k * c2 + j]; //compute an element of output matrix | |
30 | + } | |
31 | + M3[i * c1 + j] = temp; //copy an element to output matrix | |
32 | + } | |
33 | + } | |
34 | +} | |
35 | + | |
36 | +template<typename T> | |
37 | +void mtxMultranspose(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){ | |
38 | + //compute output matrix M3 of size row1 X column2 and data is column major | |
39 | + for(size_t i = 0 ; i <r1; i++){ | |
40 | + for(size_t j = 0; j< r2; j++){ | |
41 | + T temp = 0; | |
42 | + for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication | |
43 | + temp+= M1[i * c1 + k] * M2[j * c2 + k]; //compute an element of output matrix | |
44 | + } | |
45 | + M3[i * r1 + j] = temp; //copy an element to output matrix | |
46 | + } | |
47 | + } | |
48 | +} | |
49 | + | |
50 | + //display within class scatter | |
51 | +template<typename T> | |
52 | +void displayS(T* sw, size_t f){ | |
53 | + | |
54 | + for(size_t g = 0; g<1; g++){ | |
55 | + std::cout<<std::endl; | |
56 | + for(size_t j = 0; j < f; j++){ //total number of features in a gnome | |
57 | + for(size_t k = 0; k < f; k++){ //total number of features in a gnome | |
58 | + std::cout<<sw[g*f*f + j*f + k]<<" "; | |
59 | + } | |
60 | + std::cout<<std::endl; | |
61 | + } | |
62 | + } | |
63 | + std::cout<<std::endl; | |
64 | +} | |
65 | + | |
66 | +//sort eigenvalues from lapacke results | |
67 | +size_t* sortEigenVectorIndx(float* eigenvalue, size_t N){ | |
68 | + //sort indices of score in ascending order (fitness value) | |
69 | + size_t *idx = (size_t*) malloc (N * sizeof (size_t)); | |
70 | + for (size_t i = 0; i < N; i++) | |
71 | + idx[i] = i; | |
72 | + | |
73 | + for (size_t i=0; i<N; i++){ | |
74 | + for (size_t j=i+1; j<N; j++){ | |
75 | + if (eigenvalue[idx[i]] > eigenvalue[idx[j]]){ | |
76 | + std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error | |
77 | + } | |
78 | + } | |
79 | + } | |
80 | + | |
81 | + std::cout<<"best eigenvalue index: "<<eigenvalue[idx[0]]<<std::endl; | |
82 | + | |
83 | + return idx; //use as sortSIdx in selection | |
84 | + | |
85 | +} | ... | ... |
1 | +++ a/src/enviload.h | |
1 | +#include <iostream> | |
2 | +#include <fstream> | |
3 | +#include <thread> | |
4 | +#include <random> | |
5 | +#include <vector> | |
6 | +//#include <algorithm> | |
7 | + | |
8 | +#define NOMINMAX | |
9 | + | |
10 | +//stim libraries | |
11 | +#include <stim/envi/envi.h> | |
12 | +#include <stim/image/image.h> | |
13 | +#include <stim/parser/arguments.h> | |
14 | +#include <stim/ui/progressbar.h> | |
15 | +#include <stim/parser/filename.h> | |
16 | +//#include <stim/visualization/colormap.h> | |
17 | +#include <stim/parser/table.h> | |
18 | + | |
19 | +std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel# | |
20 | +//loads spectral features into a feature matrix based on a set of class images (or masks) | |
21 | +float* load_features(size_t nC, size_t tP, size_t B, stim::envi E, std::vector< unsigned int > nP){ | |
22 | + float progress = 0; //initialize the progress bar variable | |
23 | + unsigned long long bytes_fmat = sizeof(float) * tP * B; //calculate the number of bytes in the feature matrix | |
24 | + std::cout<<"totalnumber of samples "<<tP<<std::endl; | |
25 | + std::cout<<"Allocating space for the feature matrix: "<<tP<<" x "<<B<<" = "<<(float)bytes_fmat/(float)1048576<<"MB"<<std::endl; | |
26 | + float* F = (float*) malloc(bytes_fmat); //allocate space for the sifted matrix | |
27 | + std::cout<<"Loading Training Data ("<<nC<<" classes)"<<std::endl; | |
28 | + //load all of the training spectra into an array | |
29 | + unsigned long long F_idx = 0; //initialize the matrix index to 0 | |
30 | + //unsigned long long R_idx = 0; | |
31 | + for(unsigned c = 0; c < nC; c++){ //for each class image | |
32 | + std::cout<<"\tSifting class "<<c+1<<" = "<<nP[c]<<" pixels..."<<std::endl; | |
33 | + // std::thread t1 = std::thread(progress_thread_envi, &E); //start the progress bar thread | |
34 | + E.sift((void*)&F[F_idx], C[c].data(), true); //sift that class into the matrix at the proper location | |
35 | + F_idx += nP[c] * B; | |
36 | + progress = (float)(c+1) / (float)nC * 100; | |
37 | + // t1.join(); | |
38 | + } | |
39 | + | |
40 | + return F; | |
41 | +} | |
42 | + | |
43 | +/// Load responses for a Random Forest Classifier | |
44 | +unsigned int* ga_load_responses(size_t tP, size_t nC, std::vector< unsigned int > nP){ | |
45 | + unsigned int* T = (unsigned int*)malloc(tP*sizeof(unsigned int)); //generate an OpenCV vector of responses | |
46 | + size_t R_idx = 0; //index into the response array | |
47 | + for(size_t c = 0; c < nC; c++){ //for each class image | |
48 | + for(unsigned long long l = 0; l < nP[c]; l++){ //assign a response for all pixels of class c loaded in the training matrix | |
49 | + T[R_idx + l] = (unsigned int)c+1; | |
50 | + } | |
51 | + R_idx += nP[c]; //increment the response vector index | |
52 | + } | |
53 | + return T; | |
54 | +} | |
55 | + | |
56 | + | |
57 | +//loads the necessary data for training a random forest classifier | |
58 | +std::vector< unsigned int > ga_load_class_images(int argc, stim::arglist args, size_t* nC, size_t* tP){ | |
59 | + if(args["classes"].nargs() < 2){ //if fewer than two classes are specified, there's a problem | |
60 | + std::cout<<"ERROR: training requires at least two class masks"<<std::endl; | |
61 | + exit(1); | |
62 | + } | |
63 | + std::vector< unsigned int > nP; | |
64 | + size_t num_images = args["classes"].nargs(); //count the number of class images | |
65 | + //size_t num_images = args["rf"].nargs(); //count the number of class images | |
66 | + //std::vector<std::string> filenames(num_images); //initialize an array of file names to store the names of the images | |
67 | + std::string filename; //allocate space to store the filename for an image | |
68 | + for(size_t c = 0; c < num_images; c++){ //for each image | |
69 | + filename = args["classes"].as_string(c);; //get the class image file name | |
70 | + stim::image<unsigned char> image(filename); //load the image | |
71 | + //push_training_image(image.channel(0), nC, tP, nP); //push channel zero (all class images are assumed to be single channel) | |
72 | + C.push_back(image.channel(0)); | |
73 | + unsigned int npixels = (unsigned int)image.channel(0).nnz(); | |
74 | + nP.push_back(npixels); //push the number of pixels onto the pixel array | |
75 | + *tP += npixels; //add to the running total of pixels | |
76 | + *nC = *nC + 1; | |
77 | + } | |
78 | + | |
79 | + return nP; | |
80 | +} | |
81 | + | |
82 | +void display_PixelfeatureNclass(float* F, unsigned int* T, size_t B, size_t Idx){ | |
83 | + //display code for debug, displaying Idx th pixel from feature matrix F with all features B | |
84 | + std::cout<<"class of pixel["<<Idx<<"]" <<"is: "<<T[Idx]<<std::endl; | |
85 | + std::cout<<"feature["<<Idx<<"] is: "<<std::endl; | |
86 | + for (size_t i = 0; i< B; i++) | |
87 | + std::cout<<" "<<F[Idx * B + i]; | |
88 | +} | |
89 | + | |
90 | + | |
91 | +void display_args(int argc, stim::arglist args){ | |
92 | + std::cout<<"number of arguments "<<argc<<std::endl; | |
93 | + std::cout<<"arg 0 "<<args.arg(0)<<std::endl; | |
94 | + std::cout<<"arg 1 "<<args.arg(1)<<std::endl; | |
95 | +} | |
96 | + | |
97 | +void display_dataSize(size_t X, size_t Y, size_t B){ | |
98 | + std::cout<<"number of samples "<<X*Y<<std::endl; | |
99 | + std::cout<<"number of bands "<<B<<std::endl; | |
100 | + | |
101 | +} | |
102 | + | |
103 | +void display_phe(float* phe, unsigned int* P, size_t p,size_t f, size_t i, size_t j){ | |
104 | + //display code for debug, displaying jth pixel from new feature matrix which is created for gnome i | |
105 | + std::cout<<"phe["<<i<<"]["<<j<<"]"<<std::endl; | |
106 | + for(unsigned int n = 0; n < f; n++){ | |
107 | + std::cout<<P[i * f + n]; //spectral feature indices from gnome i of current population | |
108 | + std::cout<<" "<<phe[i* (p * f) +j * f + n]<<std::endl; //display 100th pixel value corresponding to feature indices in the gnome | |
109 | + | |
110 | + } | |
111 | +} | |
112 | + | |
113 | + | |
114 | +void display_gnome(unsigned int* P,size_t f,size_t gIdx){ | |
115 | + //display code for debug, displaying gnome gIdx of current population, gnome is subset of feature indices | |
116 | + for (size_t i = 0; i< f; i++) | |
117 | + std::cout<<" "<<P[gIdx * f + i]; | |
118 | +} | |
119 | + | ... | ... |
1 | +++ a/src/ga_gpu.cu | |
1 | +#ifndef GA_GPU_CU | |
2 | +#define GA_GPU_CU | |
3 | + | |
4 | +//#include <cuda.h> | |
5 | +//#include "cuda_runtime.h" | |
6 | +//#include <cuda_runtime_api.h> | |
7 | +//#include "device_launch_parameters.h" | |
8 | +#include <stim/cuda/cudatools/error.h> | |
9 | + | |
10 | +#include "timer.h" | |
11 | +//#include <stdio.h> | |
12 | +//#include <stdlib.h> | |
13 | +#include <iostream> | |
14 | +#include <fstream> | |
15 | + | |
16 | +extern Timer timer; | |
17 | + | |
18 | + | |
19 | +__global__ void kernel_computeSb(float* gpuSb, unsigned int* gpuP, float* gpuM, float* gpuCM, size_t ub, size_t f, size_t p, size_t nC, unsigned int* gpu_nPxInCls){ | |
20 | + | |
21 | + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix | |
22 | + size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome | |
23 | + size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //if we use 3d grid then it is needed | |
24 | + | |
25 | + | |
26 | + if(gnomeIndx >= p || i >= f || j >= f) return; //handling segmentation fault | |
27 | + | |
28 | + //form a sb matrix from vector sbVec, multiply each element in matrix with num of pixels in the current class | |
29 | + //and add it to previous value of between class scatter matrix sb | |
30 | + float tempsbval; | |
31 | + size_t n1; | |
32 | + size_t n2; | |
33 | + size_t classIndx; //class index in class mean matrix | |
34 | + | |
35 | + for(size_t c = 0; c < nC; c++){ | |
36 | + tempsbval = 0; | |
37 | + classIndx = c * ub; | |
38 | + n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix | |
39 | + n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix | |
40 | + tempsbval = ((gpuCM[classIndx + n1] - gpuM[n1]) *(gpuCM[classIndx + n2] - gpuM[n2])) * (float)gpu_nPxInCls[c] ; | |
41 | + gpuSb[gnomeIndx * f * f + j * f + i] += tempsbval; | |
42 | + } | |
43 | +} | |
44 | + | |
45 | + | |
46 | +//Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f) | |
47 | +__global__ void kernel_computeSw(float* gpuSw, unsigned int* gpuP, float* gpuCM, float* gpuF, unsigned int* gpuT, size_t ub, size_t f, size_t p, size_t nC, size_t tP){ | |
48 | + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix | |
49 | + size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome | |
50 | + size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //total number of individuals | |
51 | + | |
52 | + if(gnomeIndx >= p || i >= f || j >= f) return; //handling segmentation fault | |
53 | + float tempswval; | |
54 | + | |
55 | + size_t n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix | |
56 | + size_t n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix | |
57 | + tempswval = 0; | |
58 | + for(size_t c = 0; c < nC; c++){ | |
59 | + tempswval = 0; | |
60 | + for(size_t k = 0; k < tP; k++){ | |
61 | + if(gpuT[k] == (c+1) ){ | |
62 | + tempswval += ((gpuF[ k * ub + n1] - gpuCM[c * ub + n1]) * (gpuF[k * ub + n2] - gpuCM[c * ub + n2])); | |
63 | + } | |
64 | + } | |
65 | + gpuSw[gnomeIndx * f * f + j * f + i] += tempswval; | |
66 | + } | |
67 | +} | |
68 | + | |
69 | + | |
70 | + | |
71 | + | |
72 | + //=============================gpu intialization============================================= | |
73 | + /// Initialize all GPU pointers used in the GA-GPU algorithm | |
74 | + /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population | |
75 | + /// @param p is the population size | |
76 | + /// @param f is the number of desired features | |
77 | + /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean | |
78 | + /// @param cpuM is a pointer to the class mean on the CPU | |
79 | + /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class | |
80 | + /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class | |
81 | + /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices | |
82 | + /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices | |
83 | + /// @param gpuF is the destination for the GPU feature matrix | |
84 | + /// @param cpuF is the complete feature matrix on the CPU | |
85 | + | |
86 | + void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation | |
87 | + float** gpuCM, float* cpuCM, size_t nC, unsigned int ub, | |
88 | + float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls, | |
89 | + float** gpuSb, float** gpuSw, | |
90 | + float** gpuF, float* cpuF, | |
91 | + unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls){ | |
92 | + | |
93 | + HANDLE_ERROR(cudaMalloc(gpuP, p * f * sizeof(unsigned int))); //allocate space for the population on the GPU | |
94 | + | |
95 | + HANDLE_ERROR(cudaMalloc(gpuCM, nC * ub * sizeof(float))); //allocate space for the class mean and copy it to the GPU | |
96 | + HANDLE_ERROR(cudaMemcpy(*gpuCM, cpuCM, nC * ub * sizeof(float), cudaMemcpyHostToDevice)); | |
97 | + | |
98 | + | |
99 | + HANDLE_ERROR(cudaMalloc(gpuM, ub * sizeof(float))); //allocate space for the mean of the feature matrix | |
100 | + HANDLE_ERROR(cudaMemcpy(*gpuM, cpuM, ub * sizeof(float), cudaMemcpyHostToDevice)); | |
101 | + | |
102 | + HANDLE_ERROR(cudaMalloc(gpu_nPxInCls, nC * sizeof(unsigned int))); //number of pixels in each class | |
103 | + HANDLE_ERROR(cudaMemcpy(*gpu_nPxInCls, cpu_nPxInCls, nC * sizeof(unsigned int), cudaMemcpyHostToDevice)); | |
104 | + | |
105 | + | |
106 | + HANDLE_ERROR(cudaMalloc(gpuSb, p * f * f * sizeof(float))); //allocate memory for sb which is calculated for eery class separately and added together in different kernel | |
107 | + HANDLE_ERROR(cudaMalloc(gpuSw, p * f * f * sizeof(float))); | |
108 | + | |
109 | + HANDLE_ERROR(cudaMalloc(gpuF, tP * ub * sizeof(float))); | |
110 | + HANDLE_ERROR(cudaMemcpy(*gpuF, cpuF, tP * ub * sizeof(float), cudaMemcpyHostToDevice)); | |
111 | + | |
112 | + HANDLE_ERROR(cudaMalloc(gpuT, tP * sizeof(unsigned int))); | |
113 | + HANDLE_ERROR(cudaMemcpy(*gpuT, cpuT, tP* sizeof(unsigned int), cudaMemcpyHostToDevice)); | |
114 | + | |
115 | + } | |
116 | + | |
117 | + //computation on GPU | |
118 | + /// Initialize all GPU pointers used in the GA-GPU algorithm | |
119 | + /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population | |
120 | + /// @param p is the population size | |
121 | + /// @param f is the number of desired features | |
122 | + /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices | |
123 | + /// @param cpuSb is the between-class scatter matrix on the GPU (this function will copy the GPU result there) | |
124 | + /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices | |
125 | + /// @param cpuSw is the within-class scatter matrix on the GPU (this function will copy the GPU result there) | |
126 | + | |
127 | + /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean | |
128 | + /// @param cpuM is a pointer to the class mean on the CPU | |
129 | + /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class | |
130 | + /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class | |
131 | + | |
132 | + /// @param gpuF is the destination for the GPU feature matrix | |
133 | + /// @param cpuF is the complete feature matrix on the CPU | |
134 | + void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f, | |
135 | + float* gpuSb, float* cpuSb, | |
136 | + float* gpuSw, float* cpuSw, | |
137 | + float* gpuF, unsigned int* gpuT,float* gpuM, float* gpuCM, | |
138 | + size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile){ | |
139 | + | |
140 | + timer.start(); | |
141 | + HANDLE_ERROR(cudaMemcpy(gpuP, cpuP, p * f * sizeof(unsigned int), cudaMemcpyHostToDevice)); | |
142 | + HANDLE_ERROR(cudaMemset(gpuSb, 0, p * f * f * sizeof(float))); | |
143 | + | |
144 | + //grid configuration of GPU | |
145 | + size_t threads = (size_t)sqrt(props.maxThreadsPerBlock); | |
146 | + if(threads > f) threads = f; | |
147 | + size_t numberofblocksfor_f = (size_t)ceil((float)f/ threads); | |
148 | + dim3 blockdim((int)threads, (int)threads, 1); | |
149 | + dim3 griddim((int)numberofblocksfor_f, (int)numberofblocksfor_f, (int)p); //X dimension blocks will cover all gnomes of the population and each block will have as many gnomes as it can feet | |
150 | + //sharedbytes calculation | |
151 | + size_t sharedBytes = p * f * sizeof(unsigned int); //copy population to shared memory | |
152 | + if(props.sharedMemPerBlock < sharedBytes) sharedBytes = props.sharedMemPerBlock; | |
153 | + | |
154 | + //launch kernel to compute sb matrix | |
155 | + kernel_computeSb<<<griddim, blockdim, sharedBytes>>>(gpuSb, gpuP, gpuM, gpuCM, ub, f, p, nC, gpu_nPxInCls); | |
156 | + cudaDeviceSynchronize(); | |
157 | + | |
158 | + HANDLE_ERROR(cudaMemcpy(cpuSb, gpuSb, p * f * f * sizeof(float), cudaMemcpyDeviceToHost)); //copy between class scatter from gpu to cpu | |
159 | + const auto elapsedg1 = timer.time_elapsed(); | |
160 | + if(gen > gnrtn -2){ | |
161 | + std::cout << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl; | |
162 | + profilefile << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl; | |
163 | + } | |
164 | + | |
165 | + timer.start(); | |
166 | + //Compute within class scatter | |
167 | + HANDLE_ERROR(cudaMemset(gpuSw, 0, p * f * f * sizeof(float))); | |
168 | + | |
169 | + //launch kernel to compute sb matrix | |
170 | + kernel_computeSw<<<griddim, blockdim>>>(gpuSw, gpuP, gpuCM, gpuF, gpuT, ub, f, p, nC, tP); | |
171 | + cudaDeviceSynchronize(); | |
172 | + //copy between class scatter from gpu to cpu | |
173 | + HANDLE_ERROR(cudaMemcpy(cpuSw, gpuSw, p * f * f * sizeof(float), cudaMemcpyDeviceToHost)); | |
174 | + const auto elapsedg2 = timer.time_elapsed(); | |
175 | + if(gen > gnrtn - 2){ | |
176 | + std::cout << "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl; | |
177 | + profilefile<< "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl; | |
178 | + } | |
179 | + | |
180 | + } | |
181 | + | |
182 | + //free all gpu pointers | |
183 | + void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT){ | |
184 | + | |
185 | + HANDLE_ERROR(cudaFree(gpuP)); | |
186 | + HANDLE_ERROR(cudaFree(gpuCM)); | |
187 | + HANDLE_ERROR(cudaFree(gpuM)); | |
188 | + HANDLE_ERROR(cudaFree(gpu_nPxInCls)); | |
189 | + HANDLE_ERROR(cudaFree(gpuSb)); | |
190 | + HANDLE_ERROR(cudaFree(gpuSw)); | |
191 | + HANDLE_ERROR(cudaFree(gpuF)); | |
192 | + HANDLE_ERROR(cudaFree(gpuT)); | |
193 | + } | |
194 | + | |
195 | +#endif | |
196 | + | ... | ... |
1 | +++ a/src/ga_gpu.h | |
1 | +#ifndef GA_GPU_H | |
2 | +#define GA_GPU_H | |
3 | + | |
4 | +#include <iostream> | |
5 | +#include <thread> | |
6 | +#include <complex> | |
7 | +#include <cv.h> | |
8 | +#include <stdio.h> | |
9 | +#include <stdlib.h> | |
10 | +#include <iostream> | |
11 | + | |
12 | +#include "timer.h" | |
13 | + | |
14 | +#include "basic_functions.h" | |
15 | +//LAPACKE support for Visual Studio | |
16 | + | |
17 | +#ifndef LAPACK_COMPLEX_CUSTOM | |
18 | +#define LAPACK_COMPLEX_CUSTOM | |
19 | +#define lapack_complex_float std::complex<float> | |
20 | +#define lapack_complex_double std::complex<double> | |
21 | +#include "lapacke.h" | |
22 | +#endif | |
23 | + | |
24 | + | |
25 | +#define LAPACK_ROW_MAJOR 101 | |
26 | +#define LAPACK_COL_MAJOR 102 | |
27 | + | |
28 | +//CUDA functions | |
29 | +void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation | |
30 | + float** gpuCM, float* cpuCM, size_t nC, unsigned int ub, | |
31 | + float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls, | |
32 | + float** gpuSb, float** gpuSw, | |
33 | + float** gpuF, float* cpuF, | |
34 | + unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls); | |
35 | +void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f, | |
36 | + float* gpuSb, float* cpuSb, | |
37 | + float* gpuSw, float* cpuSw, | |
38 | + float* gpuF, unsigned int* T, float* gpuM, float* gpuCM, | |
39 | + size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile); | |
40 | +void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT); | |
41 | + | |
42 | +struct _fcomplex { float re, im; }; | |
43 | +typedef struct _fcomplex fcomplex; | |
44 | + | |
45 | +Timer timer; | |
46 | + | |
47 | +class ga_gpu { | |
48 | + | |
49 | +public: | |
50 | + float* F; //pointer to the raw data in host memory | |
51 | + unsigned int* T; //pointer to the class labels in host memory | |
52 | + size_t gnrtn; //total number of generations | |
53 | + size_t p; //population size | |
54 | + size_t f; // number of features to be selected | |
55 | + | |
56 | + unsigned int* P; //pointer to population of current generation genotype matrix (p x f) | |
57 | + float* S; //pointer to score(fitness value) of each gnome from current population matric P | |
58 | + unsigned int* i_guess; //initial guess of features if mentioined in args add to initial population | |
59 | + unsigned int ub; //upper bound for gnome value (maximum feature index from raw feature matrix F) | |
60 | + unsigned int lb; //lower bound for gnome value (minimum feature index from raw feature matrix F = 0) | |
61 | + float uniformRate; | |
62 | + float mutationRate; | |
63 | + size_t tournamentSize; //number of potential gnomes to select parent for crossover | |
64 | + bool elitism; //if true then passes best gnome to next generation | |
65 | + | |
66 | + //declare gpu pointers | |
67 | + float* gpuF; //Feature matrix | |
68 | + unsigned int* gpuT; //target responses of entire feature matrix | |
69 | + unsigned int* gpuP; //population matrix | |
70 | + unsigned int* gpu_nPxInCls; | |
71 | + float* gpuCM; //class mean of entire feature matrix | |
72 | + float* gpuM; //total mean of entire feature matrix | |
73 | + float* gpuSb; //between class scatter for all individuals of current population | |
74 | + float* gpuSw; //within class scatter for all individuals of current population | |
75 | + | |
76 | + //constructor | |
77 | + ga_gpu() {} | |
78 | + | |
79 | + //==============================generate initial population | |
80 | + | |
81 | + void initialize_population(std::vector<unsigned int> i_guess, bool debug) { | |
82 | + if (debug) { | |
83 | + std::cout << std::endl; | |
84 | + std::cout << "initial populatyion is: " << std::endl; | |
85 | + } | |
86 | + | |
87 | + lb = 0; | |
88 | + P = (unsigned int*)calloc(p * f, sizeof(unsigned int)); //allcate memory for genetic population(indices of features from F), p number of gnomes of size f | |
89 | + S = (float*)calloc(p, sizeof(float)); //allcate memory for scores(fitness value) of each gnome from P | |
90 | + | |
91 | + srand(1); | |
92 | + //add intial guess to the population if specified by user as a output of other algorithm or by default just random guess | |
93 | + std::memcpy(P, i_guess.data(), f * sizeof(unsigned int)); | |
94 | + | |
95 | + //generate random initial population | |
96 | + for (size_t i1 = 1; i1 < p; i1++) { | |
97 | + for (size_t i2 = 0; i2 < f; i2++) { | |
98 | + P[i1 * f + i2] = rand() % ub + lb; //select element of gnome as random feature index within lower bound(0) and upper bound(B) | |
99 | + if (debug) std::cout << P[i1 * f + i2] << "\t"; | |
100 | + } | |
101 | + if (debug) std::cout << std::endl; | |
102 | + } | |
103 | + } | |
104 | + | |
105 | + //===================generation of new population========================================== | |
106 | + | |
107 | + size_t evolvePopulation(unsigned int* newPop, float* M, bool debug) { | |
108 | + | |
109 | + //gget index of best gnome in the current population | |
110 | + size_t bestG_Indx = gIdxbestGnome(); | |
111 | + //-------------(reproduction)------- | |
112 | + if (elitism) { | |
113 | + saveGnomeIdx(0, bestG_Indx, newPop); //keep best gnome from previous generation to new generation | |
114 | + } | |
115 | + // ------------Crossover population--------------- | |
116 | + int elitismOffset; | |
117 | + if (elitism) { | |
118 | + elitismOffset = 1; | |
119 | + } | |
120 | + else { | |
121 | + elitismOffset = 0; | |
122 | + } | |
123 | + | |
124 | + //Do crossover for rest of population size | |
125 | + for (int i = elitismOffset; i <p; i++) { | |
126 | + // std::cout<<"crossover of gnome "<<i<<std::endl; | |
127 | + std::vector<unsigned int>gnome1; | |
128 | + gnome1.reserve(f); | |
129 | + gnome1 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes | |
130 | + // displaygnome(gnome1); | |
131 | + std::vector<unsigned int>gnome2; | |
132 | + gnome2.reserve(f); | |
133 | + gnome2 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes | |
134 | + // displaygnome(gnome2); | |
135 | + std::vector<unsigned int>gnome; | |
136 | + gnome.reserve(f); | |
137 | + gnome = crossover(gnome1, gnome2, M); //Do crossover of above parent gnomes to produce new gnome | |
138 | + // displaygnome(gnome); | |
139 | + saveGnome(i, gnome, newPop); //save crosseover result to new population | |
140 | + } | |
141 | + | |
142 | + //--------------Mutate population------------ | |
143 | + // introduce some mutation in new population | |
144 | + for (int i = elitismOffset; i <p; i++) { | |
145 | + //std::cout<<"mutation of gnome"<<std::endl; | |
146 | + std::vector<unsigned int>gnome; | |
147 | + gnome.reserve(f); | |
148 | + | |
149 | + for (size_t n = 0; n < f; n++) | |
150 | + gnome.push_back(newPop[i*f + n]); | |
151 | + //std::cout<<"\n starting address "<<(&newPop[0] + i*f)<<"\t end address is "<<(&newPop[0] + i*f + f-1) <<std::endl; | |
152 | + //std::copy((&newPop[0] + i*f), (&newPop[0] + i*f +f-1), gnome.begin()); | |
153 | + // displaygnome(gnome); | |
154 | + mutate(gnome); | |
155 | + // displaygnome(gnome); | |
156 | + saveGnome(i, gnome, newPop); //save new gnome to new population at position i | |
157 | + } | |
158 | + return bestG_Indx; | |
159 | + } | |
160 | + | |
161 | + //============================== functions for population evolution =========================================================================== | |
162 | + std::vector<unsigned int> tournamentSelection(size_t tSize) { | |
163 | + // Create a tournament population | |
164 | + unsigned int* tournamentP = (unsigned int*)malloc(tSize * f * sizeof(unsigned int)); | |
165 | + std::vector<float>tournamentS; | |
166 | + | |
167 | + // For each place in the tournament get a random individual | |
168 | + for (size_t i = 0; i < tSize; i++) { | |
169 | + size_t rndmIdx = rand() % p + lb; | |
170 | + tournamentS.push_back(S[rndmIdx]); | |
171 | + //for (size_t n = 0; n <f; n++) | |
172 | + //tournamentP[i * f + n] = (getGnome(rndmIdx)).at(n); | |
173 | + std::vector<unsigned int> temp_g(getGnome(rndmIdx)); | |
174 | + std::copy(temp_g.begin(), temp_g.end(), tournamentP + i*f); | |
175 | + } | |
176 | + // Get the fittest | |
177 | + std::vector<unsigned int>fittestgnome; | |
178 | + fittestgnome.reserve(f); | |
179 | + | |
180 | + //select index of best gnome from fitness score | |
181 | + size_t bestSIdx = 0; | |
182 | + for (size_t i = 0; i < tSize; i++) { | |
183 | + if (tournamentS[i] < tournamentS[bestSIdx]) | |
184 | + bestSIdx = i; //float check : it was like this b(&idx[i], &idx[j]) but gave me error | |
185 | + } | |
186 | + | |
187 | + for (size_t n = 0; n < f; n++) | |
188 | + fittestgnome.push_back(tournamentP[bestSIdx * f + n]); | |
189 | + return fittestgnome; | |
190 | + } //end of tournament selection | |
191 | + | |
192 | + | |
193 | + std::vector<unsigned int> crossover(std::vector<unsigned int> gnome1, std::vector<unsigned int> gnome2, float* M) { | |
194 | + std::vector<unsigned int> gnome; | |
195 | + for (size_t i = 0; i < f; i++) { | |
196 | + // Crossover | |
197 | + float r = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); | |
198 | + if (r <= uniformRate) { | |
199 | + gnome.push_back(gnome1.at(i)); | |
200 | + } | |
201 | + else { | |
202 | + gnome.push_back(gnome2.at(i)); | |
203 | + } | |
204 | + } | |
205 | + | |
206 | + //check new gnome for all zero bands and duplicated values | |
207 | + std::vector<unsigned int> gnomeunique; | |
208 | + int flag = 0; | |
209 | + std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7 | |
210 | + std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); | |
211 | + /* if(gnomeunique.size()< gnome.size()){ | |
212 | + flag = 1; | |
213 | + std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl; | |
214 | + }*/ | |
215 | + unsigned int featureband, featureband1, featureband2; | |
216 | + if (gnomeunique.size() < f) { | |
217 | + for (size_t k = gnomeunique.size(); k < f; k++) { | |
218 | + featureband = rand() % ub + lb; | |
219 | + for (size_t i = 0; i < f; i++) { | |
220 | + featureband1 = gnome1.at(i); | |
221 | + featureband2 = gnome2.at(i); | |
222 | + for (size_t j = 0; j < gnomeunique.size(); j++) { | |
223 | + if (gnomeunique.at(j) != featureband1) { | |
224 | + featureband = featureband1; | |
225 | + } | |
226 | + else if (gnomeunique.at(j) != featureband2) { | |
227 | + featureband = featureband2; | |
228 | + } | |
229 | + else if (gnomeunique.at(j) == featureband) { | |
230 | + featureband = rand() % ub + lb; | |
231 | + while (M[featureband] == 0) { | |
232 | + featureband = rand() % ub + lb; | |
233 | + } | |
234 | + } | |
235 | + } | |
236 | + } | |
237 | + gnomeunique.push_back(featureband); | |
238 | + } | |
239 | + } | |
240 | + //if(flag ==1){ | |
241 | + // std::cout<<"\n original gnome "<<g<<" are "<<std::endl; | |
242 | + // for(int k = 0; k < gnome.size(); k++) | |
243 | + // std::cout<<gnome[k]<<"\t"; | |
244 | + // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl; | |
245 | + // for(int k = 0; k < gnomeunique.size(); k++) | |
246 | + // std::cout<<gnomeunique[k]<<"\t"; | |
247 | + //} | |
248 | + | |
249 | + return gnomeunique; | |
250 | + } | |
251 | + | |
252 | + void mutate(std::vector<unsigned int> gnome) { | |
253 | + for (size_t i = 0; i < f; i++) { | |
254 | + float LO = (float)0.01; | |
255 | + float HI = 1; | |
256 | + float r3 = LO + static_cast <float> (rand()) / (static_cast <float> (RAND_MAX / (HI - LO))); | |
257 | + //if random value is less than mutationRate then mutate this gnome | |
258 | + if (r3 <= mutationRate) { | |
259 | + gnome.at(i) = (rand() % ub + lb); | |
260 | + gnome.push_back(rand() % ub + lb); | |
261 | + } | |
262 | + } | |
263 | + } | |
264 | + | |
265 | + ///returns gnome of given index | |
266 | + std::vector<unsigned int> getGnome(size_t idx) { | |
267 | + std::vector<unsigned int> gnome; | |
268 | + gnome.reserve(f); | |
269 | + //pulling gnome idx from population P | |
270 | + for (size_t n = 0; n < f; n++) | |
271 | + gnome.push_back(P[idx * f + n]); | |
272 | + //memcpy(&gnome[0], P+idx*f, f*sizeof(size_t)); | |
273 | + return gnome; | |
274 | + } | |
275 | + | |
276 | + //save gnome of index gIdx from previous population at position i in the new population | |
277 | + void saveGnomeIdx(size_t i, size_t gIdx, unsigned int* newPop) { | |
278 | + for (size_t n = 0; n < f; n++) | |
279 | + newPop[i * f + n] = P[gIdx * f + n]; | |
280 | + } | |
281 | + | |
282 | + void saveGnome(size_t idx, std::vector<unsigned int>gnome, unsigned int* newPop) { | |
283 | + std::copy(gnome.begin(), gnome.end(), newPop + idx*f); | |
284 | + } | |
285 | + | |
286 | + size_t gIdxbestGnome() { | |
287 | + //std::cout<<"best gnome indes is: "<<sortSIndx()[0]; | |
288 | + return sortSIndx()[0]; | |
289 | + } | |
290 | + | |
291 | + void displaygnome(std::vector<unsigned int> gnome) { | |
292 | + std::cout << "\t gnome: "; | |
293 | + for (int i = 0; i<gnome.size(); ++i) | |
294 | + std::cout << gnome[i] << ' '; | |
295 | + std::cout << std::endl; | |
296 | + } | |
297 | + | |
298 | + //---------------------post processing of score------------------------------------- | |
299 | + void Snorm() { //normalize gnome scores | |
300 | + double s; | |
301 | + for (size_t i = 0; i < p; i++) { | |
302 | + s += S[i]; //sum of all gnome score in population | |
303 | + } | |
304 | + //std::cout<<"mean Score is: "<<(double) s/p; | |
305 | + for (size_t i = 0; i <p; i++) | |
306 | + S[i] = S[i] / s; | |
307 | + } | |
308 | + | |
309 | + size_t* sortSIndx() { //sort gnome index according to gnome scores | |
310 | + //sort indices of score in ascending order (fitness value) | |
311 | + size_t *idx = (size_t*)malloc(p * sizeof(size_t)); //array to hold sorted gnome index | |
312 | + for (size_t i = 0; i < p; i++) { //initialize index array from 1 to p(population size) in an ascending order | |
313 | + idx[i] = i; | |
314 | + } | |
315 | + | |
316 | + for (size_t i = 0; i<p; i++) { //sort gnome indices according to score values using bubble sort | |
317 | + for (size_t j = i + 1; j<p; j++) { | |
318 | + if (S[idx[i]] > S[idx[j]]) { | |
319 | + std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error | |
320 | + } | |
321 | + } | |
322 | + } | |
323 | + | |
324 | + //display best gnome | |
325 | + //std::cout << "best fitness value: " << S[idx[0]] << std::endl; | |
326 | + /*if (S[idx[0]] < 0) { | |
327 | + std::cout << "best gnome is " << std::endl; | |
328 | + for (size_t i = 0; i < f; i++) | |
329 | + std::cout << P[f * idx[0] + i] << ", "; | |
330 | + std::cout << std::endl; | |
331 | + }*/ | |
332 | + | |
333 | + return idx; //use as sortSIdx in selection | |
334 | + } | |
335 | + | |
336 | + | |
337 | + //size_t* sortIndx(float* input, size_t size) { | |
338 | + // //sort indices of score in ascending order (fitness value) | |
339 | + // size_t *idx; | |
340 | + // idx = (size_t*)malloc(size * sizeof(size_t)); | |
341 | + // for (size_t i = 0; i < size; i++) | |
342 | + // idx[i] = i; | |
343 | + | |
344 | + // for (size_t i = 0; i<size; i++) { | |
345 | + // for (size_t j = i + 1; j<size; j++) { | |
346 | + // if (input[idx[i]] < input[idx[j]]) { | |
347 | + // std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error | |
348 | + // } | |
349 | + // } | |
350 | + // } | |
351 | + // return idx; //use as sortSIdx in selection | |
352 | + | |
353 | + //} | |
354 | + | |
355 | + void generateNewP(unsigned int* newPop) { | |
356 | + //std::memcpy(P, 0 , p * f *sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s | |
357 | + std::memcpy(P, newPop, p * f * sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s | |
358 | + } | |
359 | + | |
360 | + //============================== functions for fitness function =========================================================================== | |
361 | + //compute total mean M (1 X B) of all features (tP X B) | |
362 | + void ttlMean(float* M, size_t tP, size_t B) { | |
363 | + //std::cout<<"total number of pixels are "<<tP<<std::endl; | |
364 | + for (int k = 0; k < tP; k++) { //total number of pixel in feature matrix | |
365 | + for (size_t n = 0; n < B; n++) { // index of feature in ith gnome | |
366 | + M[n] += F[k * B + n]; | |
367 | + } | |
368 | + } | |
369 | + for (size_t n = 0; n < B; n++) //take an avarage of above summation | |
370 | + M[n] = M[n] / (float)tP; | |
371 | + } | |
372 | + | |
373 | + void dispalymean(float* M) { //display mean | |
374 | + std::cout << std::endl; | |
375 | + std::cout << "Total mean of gnome 1 features are is " << std::endl; | |
376 | + | |
377 | + for (size_t i = 0; i < 1; i++) { | |
378 | + for (size_t j = 0; j < f; j++) { | |
379 | + size_t index = P[i*f + j]; | |
380 | + std::cout << "feature index " << index << "\t total mean" << M[index] << std::endl; | |
381 | + } | |
382 | + } | |
383 | + std::cout << std::endl; | |
384 | + } | |
385 | + | |
386 | + //Compute class means cM (p x nC x f) of all gnome features phe(tP x f) | |
387 | + void classMean(float* cM, size_t tP, size_t nC, size_t B, std::vector<unsigned int> nPxInCls) { | |
388 | + for (size_t c = 0; c < nC; c++) { //index of class feature matrix responses | |
389 | + float* tempcM = (float*)calloc(B, sizeof(float)); //tempcM holds classmean vector for current gnome 'i', class 'c' | |
390 | + for (size_t k = 0; k < tP; k++) { //total number of pixel in feature matrix | |
391 | + if (T[k] == c + 1) { //class numbers start from 1 not 0 | |
392 | + for (size_t n = 0; n < B; n++) { //total number of features in a gnome | |
393 | + tempcM[n] += F[k * B + n]; //add phe value for feature n of class 'c' in ith gnome | |
394 | + } | |
395 | + } | |
396 | + } | |
397 | + for (size_t n = 0; n < B; n++) | |
398 | + cM[c * B + n] = tempcM[n] / (float)nPxInCls[c]; //divide by number of pixels from class 'c' | |
399 | + | |
400 | + } | |
401 | + | |
402 | + } | |
403 | + | |
404 | + //display class mean | |
405 | + void dispalyClassmean(float* cM, size_t nC) { | |
406 | + std::cout << std::endl; | |
407 | + std::cout << "class mean of gnome 1 with total classes " << nC << " is :" << std::endl; | |
408 | + for (size_t i = 0; i < 1; i++) { | |
409 | + for (size_t c = 0; c < nC; c++) { | |
410 | + for (size_t j = 0; j < f; j++) { | |
411 | + size_t index = P[i*f + j]; | |
412 | + | |
413 | + std::cout << "class index: " << c << "\t feature index " << index << "\t class mean " << cM[c * ub + index] << std::endl; | |
414 | + } | |
415 | + } | |
416 | + } | |
417 | + std::cout << std::endl; | |
418 | + } | |
419 | + | |
420 | + //-----------------------------------------between and within class Scattering computation--------------------------------------------------------------- | |
421 | + //computation on CPU | |
422 | + void cpu_computeSbSw(float* sb, float* sw, float* M, float* cM, size_t nC, size_t tP, std::vector<unsigned int> nPxInCls) { | |
423 | + timer.start(); | |
424 | + computeSb(sb, M, cM, nC, nPxInCls); //compute between class scatter on CPU | |
425 | + const auto elapsed = timer.time_elapsed(); | |
426 | + std::cout << "Sb CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count() << "us" << std::endl; | |
427 | + | |
428 | + timer.start(); | |
429 | + computeSw(sw, cM, nC, tP); //compute within class scatter on CPU | |
430 | + const auto elapsed1 = timer.time_elapsed(); | |
431 | + std::cout << "Sw CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl; | |
432 | + } | |
433 | + | |
434 | + //display between class scatter | |
435 | + void displaySb(float* sb) { | |
436 | + std::cout << "between scatter is " << std::endl; | |
437 | + for (size_t g = 0; g<1; g++) { | |
438 | + std::cout << std::endl; | |
439 | + for (size_t j = 0; j < f; j++) { //total number of features in a gnome | |
440 | + for (size_t k = 0; k < f; k++) { //total number of features in a gnome | |
441 | + std::cout << sb[g * f * f + j * f + k] << " "; | |
442 | + } | |
443 | + std::cout << std::endl; | |
444 | + } | |
445 | + } | |
446 | + std::cout << std::endl; | |
447 | + } | |
448 | + | |
449 | + //Compute between class scatter sb (p x f x f) of all gnome features phe(tP x f) | |
450 | + void computeSb(float* sb, float* M, float* cM, size_t nC, std::vector<unsigned int> nPxInCls) { | |
451 | + float tempsbval; | |
452 | + size_t n1; | |
453 | + size_t n2; | |
454 | + size_t classIndx; //class index in class mean matrix | |
455 | + /*std::cout <<"population of computation of cpusb "<< std::endl; | |
456 | + for (size_t i2 = 0; i2 < f; i2++) { | |
457 | + std::cout << P[i2] << "\t"; | |
458 | + }*/ | |
459 | + | |
460 | + for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) { | |
461 | + for (size_t c = 0; c < nC; c++) { | |
462 | + for (size_t i = 0; i < f; i++) { | |
463 | + for (size_t j = 0; j < f; j++) { | |
464 | + tempsbval = 0; | |
465 | + classIndx = c * ub; | |
466 | + n1 = P[gnomeIndx * f + i]; //actual feature index in original feature matrix | |
467 | + n2 = P[gnomeIndx * f + j]; //actual feature index in original feature matrix | |
468 | + // std::cout << "i: " << i << " j: " <<j<< " n1: " << n1 << " n2:" << n2 << std::endl; | |
469 | + tempsbval = ((cM[classIndx + n1] - M[n1]) *(cM[classIndx + n2] - M[n2])); | |
470 | + sb[gnomeIndx * f * f + i * f + j] += tempsbval * (float)nPxInCls[c]; // compute tempsb[j][k] element of class 'c' of gnome 'i' | |
471 | + } | |
472 | + } | |
473 | + } | |
474 | + } | |
475 | + | |
476 | + } | |
477 | + | |
478 | + //Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f) | |
479 | + void computeSw(float* sw, float* cM, size_t nC, size_t tP) { | |
480 | + float tempswval; | |
481 | + size_t n1; | |
482 | + size_t n2; | |
483 | + size_t cMclass; //class index in class mean matrix | |
484 | + size_t Pg; | |
485 | + size_t swg; | |
486 | + size_t pheg; | |
487 | + for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) { | |
488 | + Pg = gnomeIndx * f; | |
489 | + swg = gnomeIndx * f * f; | |
490 | + pheg = gnomeIndx * tP * f;; | |
491 | + for (size_t c = 0; c < nC; c++) { | |
492 | + cMclass = c * ub; | |
493 | + | |
494 | + for (size_t k = 0; k < tP; k++) { | |
495 | + if (T[k] == (c + 1)) { | |
496 | + for (size_t i = 0; i < f; i++) { | |
497 | + for (size_t j = 0; j < f; j++) { | |
498 | + n1 = P[Pg + i]; //actual feature index in original feature matrix | |
499 | + n2 = P[Pg + j]; //actual feature index in original feature matrix | |
500 | + | |
501 | + tempswval = 0; | |
502 | + tempswval = ((F[k * ub + n1] - cM[cMclass + n1]) * (F[k * ub + n2] - cM[cMclass + n2])); | |
503 | + //tempswval = ((phe[gnomeIndx * tP * f + k * f + i] - cM[c * ub + P[gnomeIndx * f + i]]) * (phe[gnomeIndx * tP *f + k * f + j] - cM[c * ub + P[gnomeIndx * f + j]])); | |
504 | + sw[gnomeIndx * f * f + i * f + j] += tempswval; | |
505 | + } | |
506 | + } | |
507 | + } | |
508 | + } | |
509 | + } | |
510 | + | |
511 | + } | |
512 | + } | |
513 | + //checking bands with all zeros and replacing duplicated bands in gnome but this function is only for initial population | |
514 | + //void zerobandcheck(float* M, bool initial) { | |
515 | + // for (size_t g = 0; g < p; g++) { // for each gnome | |
516 | + // for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome | |
517 | + // while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population | |
518 | + // P[g * f + i] = rand() % ub + lb; | |
519 | + // } | |
520 | + // } | |
521 | + // //checking for duplicats in a gnome | |
522 | + // std::vector<unsigned int> gnome = getGnome(g); | |
523 | + // std::vector<unsigned int> gnomeunique; | |
524 | + // int flag = 0; //flag will be set if gnome has duplicated band (feature) index | |
525 | + // std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7 | |
526 | + // std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //keep only unique copies of indices and remove duplicate copies | |
527 | + // if (gnomeunique.size()< gnome.size()) { | |
528 | + // flag = 1; //set flag for those if there are duplicated indices | |
529 | + // //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl; | |
530 | + // } | |
531 | + | |
532 | + // //adding extra random feature indices to unique copy of gnome to achive gnome size = f | |
533 | + // if (gnomeunique.size() < f) { | |
534 | + // for (size_t k = gnomeunique.size(); k < f; k++) { | |
535 | + // unsigned int rnumber = rand() % ub + lb; | |
536 | + // //check if this randomaly generated number is already present in that gnome or not | |
537 | + // for (size_t j = 0; j < gnomeunique.size(); j++) { | |
538 | + // if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number | |
539 | + // rnumber = rand() % ub + lb; | |
540 | + // j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome | |
541 | + // } | |
542 | + // } | |
543 | + // gnomeunique.push_back(rnumber); //add feature index to gnomeunique | |
544 | + // } | |
545 | + // } | |
546 | + // std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f); | |
547 | + // } | |
548 | + //} | |
549 | + | |
550 | + //checking bands with all zeros and replacing duplicated bands in gnome | |
551 | + void zerobandcheck(float* M, bool initialPop) { | |
552 | + size_t startgnome; | |
553 | + if (initialPop) { | |
554 | + startgnome = 0; //for initial population check all gnomes | |
555 | + } | |
556 | + else { | |
557 | + startgnome = 1; //for next generations start gnome check after elite children offset | |
558 | + } | |
559 | + for (size_t g = startgnome; g < p; g++) { // for each gnome except | |
560 | + | |
561 | + for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome | |
562 | + while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population | |
563 | + P[g * f + i] = rand() % ub + lb; | |
564 | + } | |
565 | + } | |
566 | + //checking for duplicats in a gnome | |
567 | + std::vector<unsigned int> gnome = getGnome(g); //get current gnome g from population matrix P | |
568 | + std::vector<unsigned int> gnomeunique; //array to store only unique band indicies in a genome | |
569 | + int flag = 0; //flag will be set if gnome has duplicated band (feature) index | |
570 | + std::sort(gnome.begin(), gnome.end()); //sort current gnome | |
571 | + std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //remove duplicat copies of band indices and keep only unique in a gnome | |
572 | + if (gnomeunique.size()< gnome.size()) { | |
573 | + flag = 1; //set flag for those if there are duplicated indices | |
574 | + //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl; | |
575 | + } | |
576 | + | |
577 | + //adding extra random feature indices to unique copy of gnome to achive gnome size = f | |
578 | + if (gnomeunique.size() < f) { | |
579 | + for (size_t k = gnomeunique.size(); k < f; k++) { | |
580 | + unsigned int rnumber = rand() % ub + lb; | |
581 | + //check if this randomaly generated number is already present in that gnome or not | |
582 | + for (size_t j = 0; j < gnomeunique.size(); j++) { | |
583 | + if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number | |
584 | + rnumber = rand() % ub + lb; //generate random number between upper bound and lower bound (ub. lb) | |
585 | + j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome | |
586 | + } | |
587 | + } | |
588 | + gnomeunique.push_back(rnumber); //add feature index to gnomeunique | |
589 | + } | |
590 | + } | |
591 | + | |
592 | + //diplay loop only if gnome has duplicated indices | |
593 | + //if(flag ==1){ | |
594 | + // std::cout<<"\n original gnome "<<g<<" are "<<std::endl; | |
595 | + // for(int k = 0; k < gnome.size(); k++) | |
596 | + // std::cout<<gnome[k]<<"\t"; | |
597 | + // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl; | |
598 | + // for(int k = 0; k < gnomeunique.size(); k++) | |
599 | + // std::cout<<gnomeunique[k]<<"\t"; | |
600 | + //} | |
601 | + std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f); //copy new gnome without any duplicate band index at current gnome location | |
602 | + } | |
603 | + } | |
604 | + | |
605 | + | |
606 | + | |
607 | + //gpu calling functions | |
608 | + //gpu initialization (allocating space for all array on GPU) | |
609 | + void gpuInitializationfrommain(float* cpuM, float* cpuCM, std::vector<unsigned int>cpu_nPxInCls, size_t tP, size_t nC) { | |
610 | + // call gpuInitialization(......) with all of the necessary parameters | |
611 | + gpuIntialization(&gpuP, p, f, &gpuCM, cpuCM, nC, ub, &gpuM, cpuM, &gpu_nPxInCls, &gpuSb, &gpuSw, &gpuF, F, &gpuT, T, tP, &cpu_nPxInCls[0]); | |
612 | + | |
613 | + } | |
614 | + | |
615 | + //Computation of between class scatter and within class scatter in GPU | |
616 | + void gpu_computeSbSw(float* cpuSb, float* cpuSw, size_t nC, size_t tP, cudaDeviceProp props, size_t gen, bool debug, std::ofstream& profilefile) { | |
617 | + //calling function for SW and Sb computation and passing necessary arrays for computation | |
618 | + // std::cout<<"gpu function calling"<<std::endl; | |
619 | + gpucomputeSbSw(gpuP, P, p, f, gpuSb, cpuSb, gpuSw, cpuSw, gpuF, gpuT, gpuM, gpuCM, nC, tP, props, gen, gnrtn, ub, gpu_nPxInCls, profilefile); | |
620 | + | |
621 | + //display computed Sb and Sw if debug is set | |
622 | + if (debug) { | |
623 | + std::cout << "From GA-GPU class: gpu results of Sb sn Sw" << std::endl; | |
624 | + displayS(cpuSb, f); //display Sb | |
625 | + displayS(cpuSw, f); //display Sw | |
626 | + std::cout << std::endl; | |
627 | + } | |
628 | + } | |
629 | + | |
630 | + //call function to free gpu pointers | |
631 | + //free all gpu pointers | |
632 | + void gpu_Destroy() { | |
633 | + gpuDestroy(gpuP, gpuCM, gpuM, gpu_nPxInCls, gpuSb, gpuSw, gpuF, gpuT); | |
634 | + } | |
635 | + | |
636 | + //Write a destructor here | |
637 | + ~ga_gpu() { | |
638 | + | |
639 | + if (F != NULL) std::free(F); //not sure about this as it is only for 2nd constructor | |
640 | + if (T != NULL) std::free(T); //same as above | |
641 | + if (P != NULL) std::free(P); //not sure about this as it is only for 2nd constructor | |
642 | + if (S != NULL) std::free(S); //same as above | |
643 | + //if(i_guess!=NULL) std::free(i_guess); //same as above | |
644 | + //HANDLE_ERROR(cudaDeviceReset()); | |
645 | + | |
646 | + } | |
647 | + }; | |
648 | + | |
649 | +#endif | ... | ... |
1 | +++ a/src/main.cpp | |
1 | +#include <iostream> | |
2 | + | |
3 | +//stim libraries | |
4 | +#include <stim/envi/envi.h> | |
5 | +#include <stim/image/image.h> | |
6 | +#include <stim/ui/progressbar.h> | |
7 | +#include <stim/parser/filename.h> | |
8 | +#include <stim/parser/table.h> | |
9 | +#include <stim/parser/arguments.h> | |
10 | +//input arguments | |
11 | +stim::arglist args; | |
12 | +#include <fstream> | |
13 | +#include <thread> | |
14 | +#include <random> | |
15 | +#include <vector> | |
16 | +#include <math.h> | |
17 | +#include <limits> | |
18 | + | |
19 | +#define NOMINMAX | |
20 | + | |
21 | + | |
22 | + | |
23 | +//GA | |
24 | +#include "ga_gpu.h" | |
25 | +#include "enviload.h" | |
26 | + | |
27 | + | |
28 | +//envi input file and associated parameters | |
29 | +stim::envi E; //ENVI binary file object | |
30 | +unsigned int B; //shortcuts storing the spatial and spectral size of the ENVI image | |
31 | +//mask and class information used for training | |
32 | +//std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel# | |
33 | +std::vector<unsigned int> nP; //array holds the number of pixels in each mask: nP[m] is the number of pixels in mask m | |
34 | +size_t nC = 0; //number of classes | |
35 | +size_t tP = 0; //total number of pixels in all masks: tP = nP[0] + nP[1] + ... + nP[nC] | |
36 | +float* fea; | |
37 | + | |
38 | +//ga_gpu class object | |
39 | +ga_gpu ga; | |
40 | +bool debug; | |
41 | +bool binaryClass; | |
42 | +int binClassOne; | |
43 | + | |
44 | +//creating struct to pass to thread functions as it limits number of arguments to 3 | |
45 | +typedef struct { | |
46 | + float* S; | |
47 | + float* Sb; | |
48 | + float* Sw; | |
49 | + float* lda; | |
50 | +}gnome; | |
51 | +gnome gnom; | |
52 | + | |
53 | + | |
54 | +void gpuComputeEignS( size_t g, size_t fea){ | |
55 | + //eigen value computation will return r = (nC-1) eigen vectors so new projected data will have dimension of r rather than f | |
56 | + // std::thread::id this_id = std::this_thread::get_id(); | |
57 | + // std::cout<<"thread id is "<< this_id<<std::endl; | |
58 | + size_t f = fea; | |
59 | + //std::thread::id g = std::this_thread::get_id(); | |
60 | + float* LeftEigVectors_a = (float*) malloc(f * f * sizeof(float)); | |
61 | + float* gSw_a = (float*) malloc(f * f * sizeof(float)); //copy of between class scatter | |
62 | + std::memcpy(gSw_a, &gnom.Sw[g * f * f], f * f *sizeof(float)); | |
63 | + if(debug){ | |
64 | + std::cout<<"From Eigen function: Sb and Sw "<<std::endl; | |
65 | + displayS(gSw_a, f); //display Sb | |
66 | + displayS(&gnom.Sb[g * f * f], f); //display Sw | |
67 | + std::cout<<std::endl; | |
68 | + } | |
69 | + | |
70 | + std::vector<unsigned int> features = ga.getGnome(g); | |
71 | + std::vector<unsigned int> featuresunique; | |
72 | + int flag = 0; | |
73 | + std::sort(features.begin(), features.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7 | |
74 | + std::unique_copy(features.begin(), features.end(), std::back_inserter(featuresunique)); | |
75 | + if(featuresunique.size()< features.size()){ | |
76 | + f = featuresunique.size(); | |
77 | + } | |
78 | + | |
79 | + size_t r = nC-1; //LDA projected dimension (limited to number of classes - 1 by rank) | |
80 | + if(r > f){ | |
81 | + r = f; | |
82 | + } | |
83 | + | |
84 | + int info; | |
85 | + float* EigenvaluesI_a = (float*)malloc(f * sizeof(float)); | |
86 | + float* Eigenvalues_a = (float*)malloc(f * sizeof(float)); | |
87 | + int *IPIV = (int*) malloc(sizeof(int) * f); | |
88 | + //computing inverse of matrix Sw | |
89 | + memset(IPIV, 0, f * sizeof(int)); | |
90 | + LAPACKE_sgetrf(LAPACK_COL_MAJOR, (int)f, (int)f, gSw_a, (int)f, IPIV); | |
91 | + // DGETRI computes the inverse of a matrix using the LU factorization computed by DGETRF. | |
92 | + LAPACKE_sgetri(LAPACK_COL_MAJOR, (int)f, gSw_a, (int)f, IPIV); | |
93 | + | |
94 | + float* gSbSw_a = (float*)calloc(f * f, sizeof(float)); | |
95 | + //mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f * sizeof(float)], f, f, f,f); | |
96 | + mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f], f, f, f,f); | |
97 | + if(debug){ | |
98 | + std::cout<<"From Eigen function: inverse of sw and ratio of sb and sw (Sb/Sw)"; | |
99 | + displayS(gSw_a, f); //display inverse of Sw (1/Sw) | |
100 | + displayS(gSbSw_a, f); //display ratio of Sb and Sw (Sb/Sw) | |
101 | + } | |
102 | + | |
103 | + //compute left eigenvectors for current gnome from ratio of between class scatter and within class scatter: Sb/Sw | |
104 | + info = LAPACKE_sgeev(LAPACK_COL_MAJOR, 'V', 'N', (int)f, gSbSw_a, (int)f, Eigenvalues_a, EigenvaluesI_a, LeftEigVectors_a, (int)f, 0, (int)f); | |
105 | + //sort eignevalue indices in descending order | |
106 | + size_t* sortedindx = sortIndx(Eigenvalues_a, f); | |
107 | + //displayS(LeftEigVectors_a, f); //display Eignevectors (Note these are -1 * matlab eigenvectors does not change fitness score results but keep in mind while projecting data on it) | |
108 | + //sorting left eigenvectors (building forward transformation matrix As) | |
109 | + for (size_t rowE = 0; rowE < r; rowE++){ | |
110 | + for (size_t colE = 0; colE < f; colE++){ | |
111 | + size_t ind1 = g * r * f + rowE * f + colE; | |
112 | + //size_t ind1 = rowE * f + colE; | |
113 | + size_t ind2 = sortedindx[rowE] * f + colE; //eigenvector as row vector | |
114 | + gnom.lda[ind1] = LeftEigVectors_a[ind2]; | |
115 | + } | |
116 | + } | |
117 | + | |
118 | + if(debug){ | |
119 | + std::cout<<"Eigenvalues are"<<std::endl; | |
120 | + for(size_t n = 0 ; n < f; n ++){ | |
121 | + std::cout << Eigenvalues_a[n] << ", " ; | |
122 | + } | |
123 | + std::cout<< std::endl; | |
124 | + std::cout<<"From Eigen function: Eignevector"<<std::endl; | |
125 | + | |
126 | + std::cout<<"LDA basis is "<<std::endl; | |
127 | + std::cout << "r is " << r << std::endl; | |
128 | + for(size_t l = 0 ; l < r; l++){ | |
129 | + for(size_t n = 0 ; n < f; n ++){ | |
130 | + std::cout << gnom.lda[g * l * f + l * f + n] << ", " ; | |
131 | + } | |
132 | + std::cout<<std::endl; | |
133 | + } | |
134 | + | |
135 | + } | |
136 | + //Extract only r eigne vectors as a LDA projection basis | |
137 | + float* tempgSb = (float*)calloc(r * f, sizeof(float)); | |
138 | + //mtxMul(tempgSb, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sb[g * f * f * sizeof(float)], r, f, f,f); | |
139 | + //mtxMul(tempgSb, &lda[g * r * f ], gSb, r, f, f,f); | |
140 | + mtxMul(tempgSb, &gnom.lda[g * r * f], &gnom.Sb[g * f * f], r, f, f,f); | |
141 | + float* nSb = (float*)calloc(r * r, sizeof(float)); | |
142 | + mtxMultranspose(nSb, tempgSb, &gnom.lda[g * r * f], r, f, r, f); | |
143 | + | |
144 | + float* tempgSw = (float*)calloc(r * f, sizeof(float)); | |
145 | + //mtxMul(tempgSw, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sw[g * f * f * sizeof(float)], r, f, f,f); | |
146 | + mtxMul(tempgSw, &gnom.lda[g * r * f], &gnom.Sw[g * f * f], r, f, f,f); | |
147 | + float* nSw = (float*)calloc(r * r, sizeof(float)); | |
148 | + mtxMultranspose(nSw, tempgSw, &gnom.lda[g * r * f], r, f, r, f); | |
149 | + if(debug){ | |
150 | + std::cout<<"From Eigen function: projected Sb sn Sw"<<std::endl; | |