release / genetic-gpu

Browse Code »

Commit 0d84034253937b68843ec545a0050dd6abaccf5c

Authored by David Mayerich 2019-12-16 13:50:34 -0600

0 parents

public release commit

Showing 11 changed files with 2164 additions and 0 deletions Show diff stats

Inline Side-by-side

CMakeLists.txt 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/CMakeLists.txt
	1	+#Specify the version being used aswell as the language
	2	+cmake_minimum_required(VERSION 2.8)
	3	+
	4	+#Name your project here
	5	+project(ga-gpu)
	6	+
	7	+#set the module directory
	8	+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
	9	+
	10	+#default to release mode
	11	+if(NOT CMAKE_BUILD_TYPE)
	12	+ set(CMAKE_BUILD_TYPE Release)
	13	+endif(NOT CMAKE_BUILD_TYPE)
	14	+
	15	+#build the executable in the binary directory on MS Visual Studio
	16	+if ( MSVC )
	17	+ SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
	18	+ SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
	19	+endif ( MSVC )
	20	+#MAYBE REMOVE-----------------
	21	+#set C++11 flags if using GCC
	22	+if( CMAKE_COMPILER_IS_GNUCC )
	23	+# SET( CMAKE_CXX_FLAGS "-std=c++11")
	24	+ set(CMAKE_CXX_FLAGS "-std=c++11 -D_FORCE_INLINES")
	25	+# SET( CUDA_NVCC_FLAGS "-std=c++11")
	26	+endif( CMAKE_COMPILER_IS_GNUCC )
	27	+
	28	+SET( CUDA_NVCC_FLAGS "--gpu-architecture=compute_50 --gpu-code=sm_50,compute_50")
	29	+#-----------------------------
	30	+
	31	+
	32	+
	33	+#find packages-----------------------------------
	34	+#find OpenCV
	35	+find_package(OpenCV REQUIRED)
	36	+add_definitions(-DUSING_OPENCV)
	37	+
	38	+#find the pthreads package
	39	+find_package(Threads)
	40	+
	41	+#find the X11 package
	42	+find_package(X11)
	43	+
	44	+#find the STIM library
	45	+find_package(STIM)
	46	+
	47	+#find CUDA, mostly for LA stuff using cuBLAS
	48	+find_package(CUDA REQUIRED)
	49	+
	50	+#find Boost for Unix-based file lists
	51	+if( CMAKE_COMPILER_IS_GNUCC )
	52	+ find_package(Boost COMPONENTS filesystem system)
	53	+ if(Boost_FOUND)
	54	+ include_directories(${Boost_INCLUDE_DIR})
	55	+ else()
	56	+ message(FATAL_ERROR "HSIproc requires Boost::filesystem and Boost::system when using GCC")
	57	+ endif()
	58	+endif()
	59	+
	60	+#find FANN
	61	+#find_package(FANN REQUIRED)
	62	+
	63	+#find the GLUT library for visualization
	64	+#find_package(OpenGL REQUIRED)
	65	+#find_package(GLUT REQUIRED)
	66	+#if(WIN32)
	67	+# find_package(GLEW REQUIRED)
	68	+# include_directories(${GLEW_INCLUDE_DIR})
	69	+#endif(WIN32)
	70	+
	71	+#find LAPACK and supporting link_libraries
	72	+find_package(LAPACKE REQUIRED)
	73	+
	74	+#include include directories
	75	+include_directories(${CUDA_INCLUDE_DIRS}
	76	+ ${OpenCV_INCLUDE_DIRS}
	77	+ ${LAPACKE_INCLUDE_DIR}
	78	+ ${STIM_INCLUDE_DIRS}
	79	+ ${OpenGL_INCLUDE_DIRS}
	80	+# ${GLUT_INCLUDE_DIR}
	81	+ ${FANN_INCLUDE_DIRS}
	82	+ "${CMAKE_SOURCE_DIR}/src"
	83	+)
	84	+
	85	+#Assign a variable for all of the header files in this project
	86	+include_directories("${CMAKE_SOURCE_DIR}/src")
	87	+#file(GLOB GACPU_H "${CMAKE_SOURCE_DIR}/src/gacpu/*.h")
	88	+file(GLOB GAGPU_H "${CMAKE_SOURCE_DIR}/src/*.h")
	89	+#file(GLOB GA_H "${CMAKE_SOURCE_DIR}/src/*.h")
	90	+
	91	+#Assign source files to the appropriate variables to easily associate them with executables
	92	+#file(GLOB GA_CPU_SRC "${CMAKE_SOURCE_DIR}/src/gacpu/*.cpp")
	93	+file(GLOB GA_GPU_SRC "${CMAKE_SOURCE_DIR}/src/.c")
	94	+
	95	+
	96	+#create an executable file
	97	+cuda_add_executable(ga-gpu
	98	+ ${GAGPU_H}
	99	+# ${GA_H}
	100	+ ${GA_GPU_SRC}
	101	+)
	102	+target_link_libraries(ga-gpu ${CUDA_LIBRARIES}
	103	+ ${CUDA_CUBLAS_LIBRARIES}
	104	+ ${CUDA_CUFFT_LIBRARIES}
	105	+ ${LAPACKE_LIBRARIES}
	106	+ ${LAPACK_LIBRARIES}
	107	+ ${BLAS_LIBRARIES}
	108	+ ${CMAKE_THREAD_LIBS_INIT}
	109	+ ${X11_LIBRARIES}
	110	+ ${OpenCV_LIBS}
	111	+)
	112	+
	113	+
	114	+#create the PROC executable----------------------------------------------
	115	+
	116	+#create an executable file
	117	+#add_executable(hsiga
	118	+# ${GACPU_H}
	119	+# ${GA_H}
	120	+# ${GA_CPU_SRC}
	121	+#)
	122	+#target_link_libraries(hsiga ${LAPACKE_LIBRARIES}
	123	+# ${LAPACK_LIBRARIES}
	124	+# ${BLAS_LIBRARIES}
	125	+# ${CMAKE_THREAD_LIBS_INIT}
	126	+# ${X11_LIBRARIES}
	127	+# ${OpenCV_LIBS}
	128	+#)
	129	+
	130	+
	131	+
	132	+#if Boost is found, set an environment variable to use with preprocessor directives
	133	+if(Boost_FILESYSTEM_FOUND)
	134	+# if(BUILD_GACPU)
	135	+# target_link_libraries(hsiga ${Boost_FILESYSTEM_LIBRARIES}
	136	+# ${Boost_SYSTEM_LIBRARY}
	137	+# )
	138	+ #message(${Boost_FILESYSTEM_LIBRARIES})
	139	+# endif(BUILD_GACPU)
	140	+# if(BUILD_GAGPU)
	141	+ target_link_libraries(ga-gpu ${Boost_FILESYSTEM_LIBRARIES}
	142	+ ${Boost_SYSTEM_LIBRARY}
	143	+ )
	144	+# endif(BUILD_GAGPU)
	145	+endif(Boost_FILESYSTEM_FOUND)
...	...

FindGLEW.cmake 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/FindGLEW.cmake
	1	+# Copyright (c) 2012-2016 DreamWorks Animation LLC
	2	+#
	3	+# All rights reserved. This software is distributed under the
	4	+# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ )
	5	+#
	6	+# Redistributions of source code must retain the above copyright
	7	+# and license notice and the following restrictions and disclaimer.
	8	+#
	9	+# * Neither the name of DreamWorks Animation nor the names of
	10	+# its contributors may be used to endorse or promote products derived
	11	+# from this software without specific prior written permission.
	12	+#
	13	+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	14	+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	15	+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	16	+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	17	+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL,
	18	+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	19	+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	20	+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	21	+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	22	+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	23	+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	24	+# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE
	25	+# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00.
	26	+#
	27	+
	28	+#--cmake--
	29	+# - Find GLEW
	30	+#
	31	+# Author : Nicholas Yue yue.nicholas@gmail.com
	32	+#
	33	+# This auxiliary CMake file helps in find the GLEW headers and libraries
	34	+#
	35	+# GLEW_FOUND set if Glew is found.
	36	+# GLEW_INCLUDE_DIR GLEW's include directory
	37	+# GLEW_glew_LIBRARY GLEW libraries
	38	+# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context)
	39	+
	40	+FIND_PACKAGE ( PackageHandleStandardArgs )
	41	+
	42	+FIND_PATH( GLEW_LOCATION include/GL/glew.h
	43	+ "$ENV{GLEW_ROOT}"
	44	+ NO_DEFAULT_PATH
	45	+ NO_SYSTEM_ENVIRONMENT_PATH
	46	+ )
	47	+
	48	+FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW
	49	+ REQUIRED_VARS GLEW_LOCATION
	50	+ )
	51	+
	52	+IF ( GLEW_LOCATION )
	53	+
	54	+ SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path")
	55	+
	56	+ SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
	57	+ IF (GLEW_USE_STATIC_LIBS)
	58	+ IF (APPLE)
	59	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
	60	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
	61	+ NO_DEFAULT_PATH
	62	+ NO_SYSTEM_ENVIRONMENT_PATH
	63	+ )
	64	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
	65	+ NO_DEFAULT_PATH
	66	+ NO_SYSTEM_ENVIRONMENT_PATH
	67	+ )
	68	+ # MESSAGE ( "APPLE STATIC" )
	69	+ # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
	70	+ ELSEIF (WIN32)
	71	+ # Link library
	72	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
	73	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib )
	74	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib )
	75	+ ELSE (APPLE)
	76	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
	77	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
	78	+ NO_DEFAULT_PATH
	79	+ NO_SYSTEM_ENVIRONMENT_PATH
	80	+ )
	81	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
	82	+ NO_DEFAULT_PATH
	83	+ NO_SYSTEM_ENVIRONMENT_PATH
	84	+ )
	85	+ # MESSAGE ( "LINUX STATIC" )
	86	+ # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
	87	+ ENDIF (APPLE)
	88	+ ELSE ()
	89	+ IF (APPLE)
	90	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib")
	91	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib )
	92	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib )
	93	+ ELSEIF (WIN32)
	94	+ # Link library
	95	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
	96	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib )
	97	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib )
	98	+ # Load library
	99	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll")
	100	+ FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin
	101	+ NO_DEFAULT_PATH
	102	+ NO_SYSTEM_ENVIRONMENT_PATH
	103	+ )
	104	+ FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin
	105	+ NO_DEFAULT_PATH
	106	+ NO_SYSTEM_ENVIRONMENT_PATH
	107	+ )
	108	+ ELSE (APPLE)
	109	+ # Unices
	110	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
	111	+ NO_DEFAULT_PATH
	112	+ NO_SYSTEM_ENVIRONMENT_PATH
	113	+ )
	114	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
	115	+ NO_DEFAULT_PATH
	116	+ NO_SYSTEM_ENVIRONMENT_PATH
	117	+ )
	118	+ ENDIF (APPLE)
	119	+ ENDIF ()
	120	+ # MUST reset
	121	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES})
	122	+
	123	+ SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library")
	124	+ SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library")
	125	+
	126	+ENDIF ()
...	...

FindGLUT.cmake 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/FindGLUT.cmake
	1	+#.rst:
	2	+# FindGLUT
	3	+# --------
	4	+#
	5	+# try to find glut library and include files.
	6	+#
	7	+# IMPORTED Targets
	8	+# ^^^^^^^^^^^^^^^^
	9	+#
	10	+# This module defines the :prop_tgt:`IMPORTED` targets:
	11	+#
	12	+# ``GLUT::GLUT``
	13	+# Defined if the system has GLUT.
	14	+#
	15	+# Result Variables
	16	+# ^^^^^^^^^^^^^^^^
	17	+#
	18	+# This module sets the following variables:
	19	+#
	20	+# ::
	21	+#
	22	+# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc.
	23	+# GLUT_LIBRARIES, the libraries to link against
	24	+# GLUT_FOUND, If false, do not try to use GLUT.
	25	+#
	26	+# Also defined, but not for general use are:
	27	+#
	28	+# ::
	29	+#
	30	+# GLUT_glut_LIBRARY = the full path to the glut library.
	31	+# GLUT_Xmu_LIBRARY = the full path to the Xmu library.
	32	+# GLUT_Xi_LIBRARY = the full path to the Xi Library.
	33	+
	34	+#=============================================================================
	35	+# Copyright 2001-2009 Kitware, Inc.
	36	+#
	37	+# Distributed under the OSI-approved BSD License (the "License");
	38	+# see accompanying file Copyright.txt for details.
	39	+#
	40	+# This software is distributed WITHOUT ANY WARRANTY; without even the
	41	+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
	42	+# See the License for more information.
	43	+#=============================================================================
	44	+# (To distribute this file outside of CMake, substitute the full
	45	+# License text for the above reference.)
	46	+
	47	+if (WIN32)
	48	+ find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h
	49	+ PATHS $ENV{GLUT_ROOT_PATH}/include )
	50	+
	51	+ if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
	52	+ find_library( GLUT_glut_LIBRARY NAMES freeglut
	53	+ PATHS
	54	+ $ENV{GLUT_ROOT_PATH}/lib/x64
	55	+
	56	+ NO_DEFAULT_PATH
	57	+ )
	58	+ else( CMAKE_SIZEOF_VOID_P EQUAL 8 )
	59	+ find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut
	60	+ PATHS
	61	+ ${OPENGL_LIBRARY_DIR}
	62	+ $ENV{GLUT_ROOT_PATH}/lib
	63	+ )
	64	+ endif( CMAKE_SIZEOF_VOID_P EQUAL 8 )
	65	+
	66	+else ()
	67	+
	68	+ if (APPLE)
	69	+ find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR})
	70	+ find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX")
	71	+ find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX")
	72	+
	73	+ if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa)
	74	+ add_library(GLUT::Cocoa UNKNOWN IMPORTED)
	75	+ # Cocoa should always be a Framework, but we check to make sure.
	76	+ if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$")
	77	+ set_target_properties(GLUT::Cocoa PROPERTIES
	78	+ IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}")
	79	+ else()
	80	+ set_target_properties(GLUT::Cocoa PROPERTIES
	81	+ IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}")
	82	+ endif()
	83	+ endif()
	84	+ else ()
	85	+
	86	+ if (BEOS)
	87	+
	88	+ set(_GLUT_INC_DIR /boot/develop/headers/os/opengl)
	89	+ set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86)
	90	+
	91	+ else()
	92	+
	93	+ find_library( GLUT_Xi_LIBRARY Xi
	94	+ /usr/openwin/lib
	95	+ )
	96	+
	97	+ find_library( GLUT_Xmu_LIBRARY Xmu
	98	+ /usr/openwin/lib
	99	+ )
	100	+
	101	+ if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi)
	102	+ add_library(GLUT::Xi UNKNOWN IMPORTED)
	103	+ set_target_properties(GLUT::Xi PROPERTIES
	104	+ IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}")
	105	+ endif()
	106	+
	107	+ if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu)
	108	+ add_library(GLUT::Xmu UNKNOWN IMPORTED)
	109	+ set_target_properties(GLUT::Xmu PROPERTIES
	110	+ IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}")
	111	+ endif()
	112	+
	113	+ endif ()
	114	+
	115	+ find_path( GLUT_INCLUDE_DIR GL/glut.h
	116	+ /usr/include/GL
	117	+ /usr/openwin/share/include
	118	+ /usr/openwin/include
	119	+ /opt/graphics/OpenGL/include
	120	+ /opt/graphics/OpenGL/contrib/libglut
	121	+ ${_GLUT_INC_DIR}
	122	+ )
	123	+
	124	+ find_library( GLUT_glut_LIBRARY glut
	125	+ /usr/openwin/lib
	126	+ ${_GLUT_glut_LIB_DIR}
	127	+ )
	128	+
	129	+ unset(_GLUT_INC_DIR)
	130	+ unset(_GLUT_glut_LIB_DIR)
	131	+
	132	+ endif ()
	133	+
	134	+endif ()
	135	+
	136	+FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR)
	137	+
	138	+if (GLUT_FOUND)
	139	+ # Is -lXi and -lXmu required on all platforms that have it?
	140	+ # If not, we need some way to figure out what platform we are on.
	141	+ set( GLUT_LIBRARIES
	142	+ ${GLUT_glut_LIBRARY}
	143	+ ${GLUT_Xmu_LIBRARY}
	144	+ ${GLUT_Xi_LIBRARY}
	145	+ ${GLUT_cocoa_LIBRARY}
	146	+ )
	147	+
	148	+ if(NOT TARGET GLUT::GLUT)
	149	+ add_library(GLUT::GLUT UNKNOWN IMPORTED)
	150	+ set_target_properties(GLUT::GLUT PROPERTIES
	151	+ INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}")
	152	+ if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$")
	153	+ set_target_properties(GLUT::GLUT PROPERTIES
	154	+ IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}")
	155	+ else()
	156	+ set_target_properties(GLUT::GLUT PROPERTIES
	157	+ IMPORTED_LOCATION "${GLUT_glut_LIBRARY}")
	158	+ endif()
	159	+
	160	+ if(TARGET GLUT::Xmu)
	161	+ set_property(TARGET GLUT::GLUT APPEND
	162	+ PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu)
	163	+ endif()
	164	+
	165	+ if(TARGET GLUT::Xi)
	166	+ set_property(TARGET GLUT::GLUT APPEND
	167	+ PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi)
	168	+ endif()
	169	+
	170	+ if(TARGET GLUT::Cocoa)
	171	+ set_property(TARGET GLUT::GLUT APPEND
	172	+ PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa)
	173	+ endif()
	174	+ endif()
	175	+
	176	+ #The following deprecated settings are for backwards compatibility with CMake1.4
	177	+ set (GLUT_LIBRARY ${GLUT_LIBRARIES})
	178	+ set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR})
	179	+endif()
	180	+
	181	+mark_as_advanced(
	182	+ GLUT_INCLUDE_DIR
	183	+ GLUT_glut_LIBRARY
	184	+ GLUT_Xmu_LIBRARY
	185	+ GLUT_Xi_LIBRARY
	186	+ )
...	...

FindLAPACKE.cmake 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/FindLAPACKE.cmake
	1	+# - Try to find LAPACKE
	2	+#
	3	+# Once done this will define
	4	+# LAPACKE_FOUND - System has LAPACKE
	5	+# LAPACKE_INCLUDE_DIRS - The LAPACKE include directories
	6	+# LAPACKE_LIBRARIES - The libraries needed to use LAPACKE
	7	+# LAPACKE_DEFINITIONS - Compiler switches required for using LAPACKE
	8	+#
	9	+# Usually, LAPACKE requires LAPACK and the BLAS. This module does
	10	+# not enforce anything about that.
	11	+
	12	+find_path(LAPACKE_INCLUDE_DIR
	13	+ NAMES lapacke.h
	14	+ PATHS $ENV{LAPACK_PATH} ${INCLUDE_INSTALL_DIR}
	15	+ PATHS ENV INCLUDE)
	16	+
	17	+find_library(LAPACKE_LIBRARY liblapacke lapacke
	18	+ PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
	19	+ PATHS ENV LIBRARY_PATH
	20	+ PATHS ENV LD_LIBRARY_PATH)
	21	+
	22	+if(MSVC)
	23	+ find_library(LAPACK_LIBRARY liblapack lapack
	24	+ PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
	25	+ PATHS ENV LIBRARY_PATH
	26	+ PATHS ENV LD_LIBRARY_PATH)
	27	+
	28	+ find_library(BLAS_LIBRARY libblas blas
	29	+ PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
	30	+ PATHS ENV LIBRARY_PATH
	31	+ PATHS ENV LD_LIBRARY_PATH)
	32	+
	33	+else()
	34	+ find_library(LAPACK REQUIRED)
	35	+ find_library(BLAS REQUIRED)
	36	+endif()
	37	+set(LAPACKE_LIBRARIES ${LAPACKE_LIBRARY} ${LAPACK_LIBRARY} ${BLAS_LIBRARY})
	38	+
	39	+include(FindPackageHandleStandardArgs)
	40	+find_package_handle_standard_args(LAPACKE DEFAULT_MSG
	41	+ LAPACKE_INCLUDE_DIR
	42	+ LAPACKE_LIBRARIES)
	43	+mark_as_advanced(LAPACKE_INCLUDE_DIR LAPACKE_LIBRARIES)
...	...

FindSTIM.cmake 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/FindSTIM.cmake
	1	+# finds the STIM library (downloads it if it isn't present)
	2	+# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
	3	+
	4	+include(FindPackageHandleStandardArgs)
	5	+
	6	+set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
	7	+
	8	+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
	9	+
	10	+if(STIM_FOUND)
	11	+ set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
	12	+elseif(STIM_FOUND)
	13	+ #if the STIM library isn't found, download it
	14	+ #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists
	15	+ #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git")
	16	+ #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
	17	+ #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH)
	18	+ message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.")
	19	+ message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib")
	20	+endif(STIM_FOUND)
	21	+
	22	+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
...	...

src/basic_functions.h 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/src/basic_functions.h
	1	+#include <stdio.h>
	2	+
	3	+
	4	+size_t* sortIndx(float* input, size_t size){
	5	+ //sort indices of score in ascending order (fitness value)
	6	+ size_t *idx;
	7	+ idx = (size_t) malloc (size sizeof (size_t));
	8	+ for (size_t i = 0; i < size; i++)
	9	+ idx[i] = i;
	10	+
	11	+ for (size_t i=0; i<size; i++){
	12	+ for (size_t j=i+1; j<size; j++){
	13	+ if (input[idx[i]] < input[idx[j]]){
	14	+ std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
	15	+ }
	16	+ }
	17	+ }
	18	+ return idx; //use as sortSIdx in selection
	19	+}
	20	+
	21	+
	22	+template<typename T>
	23	+void mtxMul(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){
	24	+ //compute output matrix M3 of size row1 X column2 and data is column major
	25	+ for(size_t i = 0 ; i <r1; i++){
	26	+ for(size_t j = 0; j< c2; j++){
	27	+ T temp = 0;
	28	+ for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication
	29	+ temp+= M1[i * c1 + k] * M2[k * c2 + j]; //compute an element of output matrix
	30	+ }
	31	+ M3[i * c1 + j] = temp; //copy an element to output matrix
	32	+ }
	33	+ }
	34	+}
	35	+
	36	+template<typename T>
	37	+void mtxMultranspose(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){
	38	+ //compute output matrix M3 of size row1 X column2 and data is column major
	39	+ for(size_t i = 0 ; i <r1; i++){
	40	+ for(size_t j = 0; j< r2; j++){
	41	+ T temp = 0;
	42	+ for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication
	43	+ temp+= M1[i * c1 + k] * M2[j * c2 + k]; //compute an element of output matrix
	44	+ }
	45	+ M3[i * r1 + j] = temp; //copy an element to output matrix
	46	+ }
	47	+ }
	48	+}
	49	+
	50	+ //display within class scatter
	51	+template<typename T>
	52	+void displayS(T* sw, size_t f){
	53	+
	54	+ for(size_t g = 0; g<1; g++){
	55	+ std::cout<<std::endl;
	56	+ for(size_t j = 0; j < f; j++){ //total number of features in a gnome
	57	+ for(size_t k = 0; k < f; k++){ //total number of features in a gnome
	58	+ std::cout<<sw[gff + j*f + k]<<" ";
	59	+ }
	60	+ std::cout<<std::endl;
	61	+ }
	62	+ }
	63	+ std::cout<<std::endl;
	64	+}
	65	+
	66	+//sort eigenvalues from lapacke results
	67	+size_t* sortEigenVectorIndx(float* eigenvalue, size_t N){
	68	+ //sort indices of score in ascending order (fitness value)
	69	+ size_t idx = (size_t) malloc (N * sizeof (size_t));
	70	+ for (size_t i = 0; i < N; i++)
	71	+ idx[i] = i;
	72	+
	73	+ for (size_t i=0; i<N; i++){
	74	+ for (size_t j=i+1; j<N; j++){
	75	+ if (eigenvalue[idx[i]] > eigenvalue[idx[j]]){
	76	+ std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
	77	+ }
	78	+ }
	79	+ }
	80	+
	81	+ std::cout<<"best eigenvalue index: "<<eigenvalue[idx[0]]<<std::endl;
	82	+
	83	+ return idx; //use as sortSIdx in selection
	84	+
	85	+}
...	...

src/enviload.h 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/src/enviload.h
	1	+#include <iostream>
	2	+#include <fstream>
	3	+#include <thread>
	4	+#include <random>
	5	+#include <vector>
	6	+//#include <algorithm>
	7	+
	8	+#define NOMINMAX
	9	+
	10	+//stim libraries
	11	+#include <stim/envi/envi.h>
	12	+#include <stim/image/image.h>
	13	+#include <stim/parser/arguments.h>
	14	+#include <stim/ui/progressbar.h>
	15	+#include <stim/parser/filename.h>
	16	+//#include <stim/visualization/colormap.h>
	17	+#include <stim/parser/table.h>
	18	+
	19	+std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel#
	20	+//loads spectral features into a feature matrix based on a set of class images (or masks)
	21	+float* load_features(size_t nC, size_t tP, size_t B, stim::envi E, std::vector< unsigned int > nP){
	22	+ float progress = 0; //initialize the progress bar variable
	23	+ unsigned long long bytes_fmat = sizeof(float) * tP * B; //calculate the number of bytes in the feature matrix
	24	+ std::cout<<"totalnumber of samples "<<tP<<std::endl;
	25	+ std::cout<<"Allocating space for the feature matrix: "<<tP<<" x "<<B<<" = "<<(float)bytes_fmat/(float)1048576<<"MB"<<std::endl;
	26	+ float* F = (float*) malloc(bytes_fmat); //allocate space for the sifted matrix
	27	+ std::cout<<"Loading Training Data ("<<nC<<" classes)"<<std::endl;
	28	+ //load all of the training spectra into an array
	29	+ unsigned long long F_idx = 0; //initialize the matrix index to 0
	30	+ //unsigned long long R_idx = 0;
	31	+ for(unsigned c = 0; c < nC; c++){ //for each class image
	32	+ std::cout<<"\tSifting class "<<c+1<<" = "<<nP[c]<<" pixels..."<<std::endl;
	33	+ // std::thread t1 = std::thread(progress_thread_envi, &E); //start the progress bar thread
	34	+ E.sift((void*)&F[F_idx], C[c].data(), true); //sift that class into the matrix at the proper location
	35	+ F_idx += nP[c] * B;
	36	+ progress = (float)(c+1) / (float)nC * 100;
	37	+ // t1.join();
	38	+ }
	39	+
	40	+ return F;
	41	+}
	42	+
	43	+/// Load responses for a Random Forest Classifier
	44	+unsigned int* ga_load_responses(size_t tP, size_t nC, std::vector< unsigned int > nP){
	45	+ unsigned int* T = (unsigned int)malloc(tPsizeof(unsigned int)); //generate an OpenCV vector of responses
	46	+ size_t R_idx = 0; //index into the response array
	47	+ for(size_t c = 0; c < nC; c++){ //for each class image
	48	+ for(unsigned long long l = 0; l < nP[c]; l++){ //assign a response for all pixels of class c loaded in the training matrix
	49	+ T[R_idx + l] = (unsigned int)c+1;
	50	+ }
	51	+ R_idx += nP[c]; //increment the response vector index
	52	+ }
	53	+ return T;
	54	+}
	55	+
	56	+
	57	+//loads the necessary data for training a random forest classifier
	58	+std::vector< unsigned int > ga_load_class_images(int argc, stim::arglist args, size_t* nC, size_t* tP){
	59	+ if(args["classes"].nargs() < 2){ //if fewer than two classes are specified, there's a problem
	60	+ std::cout<<"ERROR: training requires at least two class masks"<<std::endl;
	61	+ exit(1);
	62	+ }
	63	+ std::vector< unsigned int > nP;
	64	+ size_t num_images = args["classes"].nargs(); //count the number of class images
	65	+ //size_t num_images = args["rf"].nargs(); //count the number of class images
	66	+ //std::vector<std::string> filenames(num_images); //initialize an array of file names to store the names of the images
	67	+ std::string filename; //allocate space to store the filename for an image
	68	+ for(size_t c = 0; c < num_images; c++){ //for each image
	69	+ filename = args["classes"].as_string(c);; //get the class image file name
	70	+ stim::image<unsigned char> image(filename); //load the image
	71	+ //push_training_image(image.channel(0), nC, tP, nP); //push channel zero (all class images are assumed to be single channel)
	72	+ C.push_back(image.channel(0));
	73	+ unsigned int npixels = (unsigned int)image.channel(0).nnz();
	74	+ nP.push_back(npixels); //push the number of pixels onto the pixel array
	75	+ *tP += npixels; //add to the running total of pixels
	76	+ nC = nC + 1;
	77	+ }
	78	+
	79	+ return nP;
	80	+}
	81	+
	82	+void display_PixelfeatureNclass(float* F, unsigned int* T, size_t B, size_t Idx){
	83	+ //display code for debug, displaying Idx th pixel from feature matrix F with all features B
	84	+ std::cout<<"class of pixel["<<Idx<<"]" <<"is: "<<T[Idx]<<std::endl;
	85	+ std::cout<<"feature["<<Idx<<"] is: "<<std::endl;
	86	+ for (size_t i = 0; i< B; i++)
	87	+ std::cout<<" "<<F[Idx * B + i];
	88	+}
	89	+
	90	+
	91	+void display_args(int argc, stim::arglist args){
	92	+ std::cout<<"number of arguments "<<argc<<std::endl;
	93	+ std::cout<<"arg 0 "<<args.arg(0)<<std::endl;
	94	+ std::cout<<"arg 1 "<<args.arg(1)<<std::endl;
	95	+}
	96	+
	97	+void display_dataSize(size_t X, size_t Y, size_t B){
	98	+ std::cout<<"number of samples "<<X*Y<<std::endl;
	99	+ std::cout<<"number of bands "<<B<<std::endl;
	100	+
	101	+}
	102	+
	103	+void display_phe(float* phe, unsigned int* P, size_t p,size_t f, size_t i, size_t j){
	104	+ //display code for debug, displaying jth pixel from new feature matrix which is created for gnome i
	105	+ std::cout<<"phe["<<i<<"]["<<j<<"]"<<std::endl;
	106	+ for(unsigned int n = 0; n < f; n++){
	107	+ std::cout<<P[i * f + n]; //spectral feature indices from gnome i of current population
	108	+ std::cout<<" "<<phe[i* (p * f) +j * f + n]<<std::endl; //display 100th pixel value corresponding to feature indices in the gnome
	109	+
	110	+ }
	111	+}
	112	+
	113	+
	114	+void display_gnome(unsigned int* P,size_t f,size_t gIdx){
	115	+ //display code for debug, displaying gnome gIdx of current population, gnome is subset of feature indices
	116	+ for (size_t i = 0; i< f; i++)
	117	+ std::cout<<" "<<P[gIdx * f + i];
	118	+}
	119	+
...	...

src/ga_gpu.cu 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/src/ga_gpu.cu
	1	+#ifndef GA_GPU_CU
	2	+#define GA_GPU_CU
	3	+
	4	+//#include <cuda.h>
	5	+//#include "cuda_runtime.h"
	6	+//#include <cuda_runtime_api.h>
	7	+//#include "device_launch_parameters.h"
	8	+#include <stim/cuda/cudatools/error.h>
	9	+
	10	+#include "timer.h"
	11	+//#include <stdio.h>
	12	+//#include <stdlib.h>
	13	+#include <iostream>
	14	+#include <fstream>
	15	+
	16	+extern Timer timer;
	17	+
	18	+
	19	+__global__ void kernel_computeSb(float* gpuSb, unsigned int* gpuP, float* gpuM, float* gpuCM, size_t ub, size_t f, size_t p, size_t nC, unsigned int* gpu_nPxInCls){
	20	+
	21	+ size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix
	22	+ size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome
	23	+ size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //if we use 3d grid then it is needed
	24	+
	25	+
	26	+ if(gnomeIndx >= p \|\| i >= f \|\| j >= f) return; //handling segmentation fault
	27	+
	28	+ //form a sb matrix from vector sbVec, multiply each element in matrix with num of pixels in the current class
	29	+ //and add it to previous value of between class scatter matrix sb
	30	+ float tempsbval;
	31	+ size_t n1;
	32	+ size_t n2;
	33	+ size_t classIndx; //class index in class mean matrix
	34	+
	35	+ for(size_t c = 0; c < nC; c++){
	36	+ tempsbval = 0;
	37	+ classIndx = c * ub;
	38	+ n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix
	39	+ n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix
	40	+ tempsbval = ((gpuCM[classIndx + n1] - gpuM[n1]) (gpuCM[classIndx + n2] - gpuM[n2])) (float)gpu_nPxInCls[c] ;
	41	+ gpuSb[gnomeIndx * f * f + j * f + i] += tempsbval;
	42	+ }
	43	+}
	44	+
	45	+
	46	+//Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f)
	47	+__global__ void kernel_computeSw(float* gpuSw, unsigned int* gpuP, float* gpuCM, float* gpuF, unsigned int* gpuT, size_t ub, size_t f, size_t p, size_t nC, size_t tP){
	48	+ size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix
	49	+ size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome
	50	+ size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //total number of individuals
	51	+
	52	+ if(gnomeIndx >= p \|\| i >= f \|\| j >= f) return; //handling segmentation fault
	53	+ float tempswval;
	54	+
	55	+ size_t n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix
	56	+ size_t n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix
	57	+ tempswval = 0;
	58	+ for(size_t c = 0; c < nC; c++){
	59	+ tempswval = 0;
	60	+ for(size_t k = 0; k < tP; k++){
	61	+ if(gpuT[k] == (c+1) ){
	62	+ tempswval += ((gpuF[ k * ub + n1] - gpuCM[c * ub + n1]) * (gpuF[k * ub + n2] - gpuCM[c * ub + n2]));
	63	+ }
	64	+ }
	65	+ gpuSw[gnomeIndx * f * f + j * f + i] += tempswval;
	66	+ }
	67	+}
	68	+
	69	+
	70	+
	71	+
	72	+ //=============================gpu intialization=============================================
	73	+ /// Initialize all GPU pointers used in the GA-GPU algorithm
	74	+ /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population
	75	+ /// @param p is the population size
	76	+ /// @param f is the number of desired features
	77	+ /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean
	78	+ /// @param cpuM is a pointer to the class mean on the CPU
	79	+ /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class
	80	+ /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class
	81	+ /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices
	82	+ /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices
	83	+ /// @param gpuF is the destination for the GPU feature matrix
	84	+ /// @param cpuF is the complete feature matrix on the CPU
	85	+
	86	+ void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation
	87	+ float** gpuCM, float* cpuCM, size_t nC, unsigned int ub,
	88	+ float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls,
	89	+ float gpuSb, float gpuSw,
	90	+ float** gpuF, float* cpuF,
	91	+ unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls){
	92	+
	93	+ HANDLE_ERROR(cudaMalloc(gpuP, p * f * sizeof(unsigned int))); //allocate space for the population on the GPU
	94	+
	95	+ HANDLE_ERROR(cudaMalloc(gpuCM, nC * ub * sizeof(float))); //allocate space for the class mean and copy it to the GPU
	96	+ HANDLE_ERROR(cudaMemcpy(gpuCM, cpuCM, nC ub * sizeof(float), cudaMemcpyHostToDevice));
	97	+
	98	+
	99	+ HANDLE_ERROR(cudaMalloc(gpuM, ub * sizeof(float))); //allocate space for the mean of the feature matrix
	100	+ HANDLE_ERROR(cudaMemcpy(gpuM, cpuM, ub sizeof(float), cudaMemcpyHostToDevice));
	101	+
	102	+ HANDLE_ERROR(cudaMalloc(gpu_nPxInCls, nC * sizeof(unsigned int))); //number of pixels in each class
	103	+ HANDLE_ERROR(cudaMemcpy(gpu_nPxInCls, cpu_nPxInCls, nC sizeof(unsigned int), cudaMemcpyHostToDevice));
	104	+
	105	+
	106	+ HANDLE_ERROR(cudaMalloc(gpuSb, p * f * f * sizeof(float))); //allocate memory for sb which is calculated for eery class separately and added together in different kernel
	107	+ HANDLE_ERROR(cudaMalloc(gpuSw, p * f * f * sizeof(float)));
	108	+
	109	+ HANDLE_ERROR(cudaMalloc(gpuF, tP * ub * sizeof(float)));
	110	+ HANDLE_ERROR(cudaMemcpy(gpuF, cpuF, tP ub * sizeof(float), cudaMemcpyHostToDevice));
	111	+
	112	+ HANDLE_ERROR(cudaMalloc(gpuT, tP * sizeof(unsigned int)));
	113	+ HANDLE_ERROR(cudaMemcpy(gpuT, cpuT, tP sizeof(unsigned int), cudaMemcpyHostToDevice));
	114	+
	115	+ }
	116	+
	117	+ //computation on GPU
	118	+ /// Initialize all GPU pointers used in the GA-GPU algorithm
	119	+ /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population
	120	+ /// @param p is the population size
	121	+ /// @param f is the number of desired features
	122	+ /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices
	123	+ /// @param cpuSb is the between-class scatter matrix on the GPU (this function will copy the GPU result there)
	124	+ /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices
	125	+ /// @param cpuSw is the within-class scatter matrix on the GPU (this function will copy the GPU result there)
	126	+
	127	+ /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean
	128	+ /// @param cpuM is a pointer to the class mean on the CPU
	129	+ /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class
	130	+ /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class
	131	+
	132	+ /// @param gpuF is the destination for the GPU feature matrix
	133	+ /// @param cpuF is the complete feature matrix on the CPU
	134	+ void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f,
	135	+ float* gpuSb, float* cpuSb,
	136	+ float* gpuSw, float* cpuSw,
	137	+ float* gpuF, unsigned int* gpuT,float* gpuM, float* gpuCM,
	138	+ size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile){
	139	+
	140	+ timer.start();
	141	+ HANDLE_ERROR(cudaMemcpy(gpuP, cpuP, p * f * sizeof(unsigned int), cudaMemcpyHostToDevice));
	142	+ HANDLE_ERROR(cudaMemset(gpuSb, 0, p * f * f * sizeof(float)));
	143	+
	144	+ //grid configuration of GPU
	145	+ size_t threads = (size_t)sqrt(props.maxThreadsPerBlock);
	146	+ if(threads > f) threads = f;
	147	+ size_t numberofblocksfor_f = (size_t)ceil((float)f/ threads);
	148	+ dim3 blockdim((int)threads, (int)threads, 1);
	149	+ dim3 griddim((int)numberofblocksfor_f, (int)numberofblocksfor_f, (int)p); //X dimension blocks will cover all gnomes of the population and each block will have as many gnomes as it can feet
	150	+ //sharedbytes calculation
	151	+ size_t sharedBytes = p * f * sizeof(unsigned int); //copy population to shared memory
	152	+ if(props.sharedMemPerBlock < sharedBytes) sharedBytes = props.sharedMemPerBlock;
	153	+
	154	+ //launch kernel to compute sb matrix
	155	+ kernel_computeSb<<<griddim, blockdim, sharedBytes>>>(gpuSb, gpuP, gpuM, gpuCM, ub, f, p, nC, gpu_nPxInCls);
	156	+ cudaDeviceSynchronize();
	157	+
	158	+ HANDLE_ERROR(cudaMemcpy(cpuSb, gpuSb, p * f * f * sizeof(float), cudaMemcpyDeviceToHost)); //copy between class scatter from gpu to cpu
	159	+ const auto elapsedg1 = timer.time_elapsed();
	160	+ if(gen > gnrtn -2){
	161	+ std::cout << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl;
	162	+ profilefile << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl;
	163	+ }
	164	+
	165	+ timer.start();
	166	+ //Compute within class scatter
	167	+ HANDLE_ERROR(cudaMemset(gpuSw, 0, p * f * f * sizeof(float)));
	168	+
	169	+ //launch kernel to compute sb matrix
	170	+ kernel_computeSw<<<griddim, blockdim>>>(gpuSw, gpuP, gpuCM, gpuF, gpuT, ub, f, p, nC, tP);
	171	+ cudaDeviceSynchronize();
	172	+ //copy between class scatter from gpu to cpu
	173	+ HANDLE_ERROR(cudaMemcpy(cpuSw, gpuSw, p * f * f * sizeof(float), cudaMemcpyDeviceToHost));
	174	+ const auto elapsedg2 = timer.time_elapsed();
	175	+ if(gen > gnrtn - 2){
	176	+ std::cout << "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl;
	177	+ profilefile<< "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl;
	178	+ }
	179	+
	180	+ }
	181	+
	182	+ //free all gpu pointers
	183	+ void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT){
	184	+
	185	+ HANDLE_ERROR(cudaFree(gpuP));
	186	+ HANDLE_ERROR(cudaFree(gpuCM));
	187	+ HANDLE_ERROR(cudaFree(gpuM));
	188	+ HANDLE_ERROR(cudaFree(gpu_nPxInCls));
	189	+ HANDLE_ERROR(cudaFree(gpuSb));
	190	+ HANDLE_ERROR(cudaFree(gpuSw));
	191	+ HANDLE_ERROR(cudaFree(gpuF));
	192	+ HANDLE_ERROR(cudaFree(gpuT));
	193	+ }
	194	+
	195	+#endif
	196	+
...	...

src/ga_gpu.h 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/src/ga_gpu.h
	1	+#ifndef GA_GPU_H
	2	+#define GA_GPU_H
	3	+
	4	+#include <iostream>
	5	+#include <thread>
	6	+#include <complex>
	7	+#include <cv.h>
	8	+#include <stdio.h>
	9	+#include <stdlib.h>
	10	+#include <iostream>
	11	+
	12	+#include "timer.h"
	13	+
	14	+#include "basic_functions.h"
	15	+//LAPACKE support for Visual Studio
	16	+
	17	+#ifndef LAPACK_COMPLEX_CUSTOM
	18	+#define LAPACK_COMPLEX_CUSTOM
	19	+#define lapack_complex_float std::complex<float>
	20	+#define lapack_complex_double std::complex<double>
	21	+#include "lapacke.h"
	22	+#endif
	23	+
	24	+
	25	+#define LAPACK_ROW_MAJOR 101
	26	+#define LAPACK_COL_MAJOR 102
	27	+
	28	+//CUDA functions
	29	+void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation
	30	+ float** gpuCM, float* cpuCM, size_t nC, unsigned int ub,
	31	+ float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls,
	32	+ float gpuSb, float gpuSw,
	33	+ float** gpuF, float* cpuF,
	34	+ unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls);
	35	+void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f,
	36	+ float* gpuSb, float* cpuSb,
	37	+ float* gpuSw, float* cpuSw,
	38	+ float* gpuF, unsigned int* T, float* gpuM, float* gpuCM,
	39	+ size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile);
	40	+void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT);
	41	+
	42	+struct _fcomplex { float re, im; };
	43	+typedef struct _fcomplex fcomplex;
	44	+
	45	+Timer timer;
	46	+
	47	+class ga_gpu {
	48	+
	49	+public:
	50	+ float* F; //pointer to the raw data in host memory
	51	+ unsigned int* T; //pointer to the class labels in host memory
	52	+ size_t gnrtn; //total number of generations
	53	+ size_t p; //population size
	54	+ size_t f; // number of features to be selected
	55	+
	56	+ unsigned int* P; //pointer to population of current generation genotype matrix (p x f)
	57	+ float* S; //pointer to score(fitness value) of each gnome from current population matric P
	58	+ unsigned int* i_guess; //initial guess of features if mentioined in args add to initial population
	59	+ unsigned int ub; //upper bound for gnome value (maximum feature index from raw feature matrix F)
	60	+ unsigned int lb; //lower bound for gnome value (minimum feature index from raw feature matrix F = 0)
	61	+ float uniformRate;
	62	+ float mutationRate;
	63	+ size_t tournamentSize; //number of potential gnomes to select parent for crossover
	64	+ bool elitism; //if true then passes best gnome to next generation
	65	+
	66	+ //declare gpu pointers
	67	+ float* gpuF; //Feature matrix
	68	+ unsigned int* gpuT; //target responses of entire feature matrix
	69	+ unsigned int* gpuP; //population matrix
	70	+ unsigned int* gpu_nPxInCls;
	71	+ float* gpuCM; //class mean of entire feature matrix
	72	+ float* gpuM; //total mean of entire feature matrix
	73	+ float* gpuSb; //between class scatter for all individuals of current population
	74	+ float* gpuSw; //within class scatter for all individuals of current population
	75	+
	76	+ //constructor
	77	+ ga_gpu() {}
	78	+
	79	+ //==============================generate initial population
	80	+
	81	+ void initialize_population(std::vector<unsigned int> i_guess, bool debug) {
	82	+ if (debug) {
	83	+ std::cout << std::endl;
	84	+ std::cout << "initial populatyion is: " << std::endl;
	85	+ }
	86	+
	87	+ lb = 0;
	88	+ P = (unsigned int)calloc(p f, sizeof(unsigned int)); //allcate memory for genetic population(indices of features from F), p number of gnomes of size f
	89	+ S = (float*)calloc(p, sizeof(float)); //allcate memory for scores(fitness value) of each gnome from P
	90	+
	91	+ srand(1);
	92	+ //add intial guess to the population if specified by user as a output of other algorithm or by default just random guess
	93	+ std::memcpy(P, i_guess.data(), f * sizeof(unsigned int));
	94	+
	95	+ //generate random initial population
	96	+ for (size_t i1 = 1; i1 < p; i1++) {
	97	+ for (size_t i2 = 0; i2 < f; i2++) {
	98	+ P[i1 * f + i2] = rand() % ub + lb; //select element of gnome as random feature index within lower bound(0) and upper bound(B)
	99	+ if (debug) std::cout << P[i1 * f + i2] << "\t";
	100	+ }
	101	+ if (debug) std::cout << std::endl;
	102	+ }
	103	+ }
	104	+
	105	+ //===================generation of new population==========================================
	106	+
	107	+ size_t evolvePopulation(unsigned int* newPop, float* M, bool debug) {
	108	+
	109	+ //gget index of best gnome in the current population
	110	+ size_t bestG_Indx = gIdxbestGnome();
	111	+ //-------------(reproduction)-------
	112	+ if (elitism) {
	113	+ saveGnomeIdx(0, bestG_Indx, newPop); //keep best gnome from previous generation to new generation
	114	+ }
	115	+ // ------------Crossover population---------------
	116	+ int elitismOffset;
	117	+ if (elitism) {
	118	+ elitismOffset = 1;
	119	+ }
	120	+ else {
	121	+ elitismOffset = 0;
	122	+ }
	123	+
	124	+ //Do crossover for rest of population size
	125	+ for (int i = elitismOffset; i <p; i++) {
	126	+ // std::cout<<"crossover of gnome "<<i<<std::endl;
	127	+ std::vector<unsigned int>gnome1;
	128	+ gnome1.reserve(f);
	129	+ gnome1 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes
	130	+ // displaygnome(gnome1);
	131	+ std::vector<unsigned int>gnome2;
	132	+ gnome2.reserve(f);
	133	+ gnome2 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes
	134	+ // displaygnome(gnome2);
	135	+ std::vector<unsigned int>gnome;
	136	+ gnome.reserve(f);
	137	+ gnome = crossover(gnome1, gnome2, M); //Do crossover of above parent gnomes to produce new gnome
	138	+ // displaygnome(gnome);
	139	+ saveGnome(i, gnome, newPop); //save crosseover result to new population
	140	+ }
	141	+
	142	+ //--------------Mutate population------------
	143	+ // introduce some mutation in new population
	144	+ for (int i = elitismOffset; i <p; i++) {
	145	+ //std::cout<<"mutation of gnome"<<std::endl;
	146	+ std::vector<unsigned int>gnome;
	147	+ gnome.reserve(f);
	148	+
	149	+ for (size_t n = 0; n < f; n++)
	150	+ gnome.push_back(newPop[i*f + n]);
	151	+ //std::cout<<"\n starting address "<<(&newPop[0] + if)<<"\t end address is "<<(&newPop[0] + if + f-1) <<std::endl;
	152	+ //std::copy((&newPop[0] + if), (&newPop[0] + if +f-1), gnome.begin());
	153	+ // displaygnome(gnome);
	154	+ mutate(gnome);
	155	+ // displaygnome(gnome);
	156	+ saveGnome(i, gnome, newPop); //save new gnome to new population at position i
	157	+ }
	158	+ return bestG_Indx;
	159	+ }
	160	+
	161	+ //============================== functions for population evolution ===========================================================================
	162	+ std::vector<unsigned int> tournamentSelection(size_t tSize) {
	163	+ // Create a tournament population
	164	+ unsigned int* tournamentP = (unsigned int)malloc(tSize f * sizeof(unsigned int));
	165	+ std::vector<float>tournamentS;
	166	+
	167	+ // For each place in the tournament get a random individual
	168	+ for (size_t i = 0; i < tSize; i++) {
	169	+ size_t rndmIdx = rand() % p + lb;
	170	+ tournamentS.push_back(S[rndmIdx]);
	171	+ //for (size_t n = 0; n <f; n++)
	172	+ //tournamentP[i * f + n] = (getGnome(rndmIdx)).at(n);
	173	+ std::vector<unsigned int> temp_g(getGnome(rndmIdx));
	174	+ std::copy(temp_g.begin(), temp_g.end(), tournamentP + i*f);
	175	+ }
	176	+ // Get the fittest
	177	+ std::vector<unsigned int>fittestgnome;
	178	+ fittestgnome.reserve(f);
	179	+
	180	+ //select index of best gnome from fitness score
	181	+ size_t bestSIdx = 0;
	182	+ for (size_t i = 0; i < tSize; i++) {
	183	+ if (tournamentS[i] < tournamentS[bestSIdx])
	184	+ bestSIdx = i; //float check : it was like this b(&idx[i], &idx[j]) but gave me error
	185	+ }
	186	+
	187	+ for (size_t n = 0; n < f; n++)
	188	+ fittestgnome.push_back(tournamentP[bestSIdx * f + n]);
	189	+ return fittestgnome;
	190	+ } //end of tournament selection
	191	+
	192	+
	193	+ std::vector<unsigned int> crossover(std::vector<unsigned int> gnome1, std::vector<unsigned int> gnome2, float* M) {
	194	+ std::vector<unsigned int> gnome;
	195	+ for (size_t i = 0; i < f; i++) {
	196	+ // Crossover
	197	+ float r = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
	198	+ if (r <= uniformRate) {
	199	+ gnome.push_back(gnome1.at(i));
	200	+ }
	201	+ else {
	202	+ gnome.push_back(gnome2.at(i));
	203	+ }
	204	+ }
	205	+
	206	+ //check new gnome for all zero bands and duplicated values
	207	+ std::vector<unsigned int> gnomeunique;
	208	+ int flag = 0;
	209	+ std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
	210	+ std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique));
	211	+ /* if(gnomeunique.size()< gnome.size()){
	212	+ flag = 1;
	213	+ std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
	214	+ }*/
	215	+ unsigned int featureband, featureband1, featureband2;
	216	+ if (gnomeunique.size() < f) {
	217	+ for (size_t k = gnomeunique.size(); k < f; k++) {
	218	+ featureband = rand() % ub + lb;
	219	+ for (size_t i = 0; i < f; i++) {
	220	+ featureband1 = gnome1.at(i);
	221	+ featureband2 = gnome2.at(i);
	222	+ for (size_t j = 0; j < gnomeunique.size(); j++) {
	223	+ if (gnomeunique.at(j) != featureband1) {
	224	+ featureband = featureband1;
	225	+ }
	226	+ else if (gnomeunique.at(j) != featureband2) {
	227	+ featureband = featureband2;
	228	+ }
	229	+ else if (gnomeunique.at(j) == featureband) {
	230	+ featureband = rand() % ub + lb;
	231	+ while (M[featureband] == 0) {
	232	+ featureband = rand() % ub + lb;
	233	+ }
	234	+ }
	235	+ }
	236	+ }
	237	+ gnomeunique.push_back(featureband);
	238	+ }
	239	+ }
	240	+ //if(flag ==1){
	241	+ // std::cout<<"\n original gnome "<<g<<" are "<<std::endl;
	242	+ // for(int k = 0; k < gnome.size(); k++)
	243	+ // std::cout<<gnome[k]<<"\t";
	244	+ // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl;
	245	+ // for(int k = 0; k < gnomeunique.size(); k++)
	246	+ // std::cout<<gnomeunique[k]<<"\t";
	247	+ //}
	248	+
	249	+ return gnomeunique;
	250	+ }
	251	+
	252	+ void mutate(std::vector<unsigned int> gnome) {
	253	+ for (size_t i = 0; i < f; i++) {
	254	+ float LO = (float)0.01;
	255	+ float HI = 1;
	256	+ float r3 = LO + static_cast <float> (rand()) / (static_cast <float> (RAND_MAX / (HI - LO)));
	257	+ //if random value is less than mutationRate then mutate this gnome
	258	+ if (r3 <= mutationRate) {
	259	+ gnome.at(i) = (rand() % ub + lb);
	260	+ gnome.push_back(rand() % ub + lb);
	261	+ }
	262	+ }
	263	+ }
	264	+
	265	+ ///returns gnome of given index
	266	+ std::vector<unsigned int> getGnome(size_t idx) {
	267	+ std::vector<unsigned int> gnome;
	268	+ gnome.reserve(f);
	269	+ //pulling gnome idx from population P
	270	+ for (size_t n = 0; n < f; n++)
	271	+ gnome.push_back(P[idx * f + n]);
	272	+ //memcpy(&gnome[0], P+idxf, fsizeof(size_t));
	273	+ return gnome;
	274	+ }
	275	+
	276	+ //save gnome of index gIdx from previous population at position i in the new population
	277	+ void saveGnomeIdx(size_t i, size_t gIdx, unsigned int* newPop) {
	278	+ for (size_t n = 0; n < f; n++)
	279	+ newPop[i * f + n] = P[gIdx * f + n];
	280	+ }
	281	+
	282	+ void saveGnome(size_t idx, std::vector<unsigned int>gnome, unsigned int* newPop) {
	283	+ std::copy(gnome.begin(), gnome.end(), newPop + idx*f);
	284	+ }
	285	+
	286	+ size_t gIdxbestGnome() {
	287	+ //std::cout<<"best gnome indes is: "<<sortSIndx()[0];
	288	+ return sortSIndx()[0];
	289	+ }
	290	+
	291	+ void displaygnome(std::vector<unsigned int> gnome) {
	292	+ std::cout << "\t gnome: ";
	293	+ for (int i = 0; i<gnome.size(); ++i)
	294	+ std::cout << gnome[i] << ' ';
	295	+ std::cout << std::endl;
	296	+ }
	297	+
	298	+ //---------------------post processing of score-------------------------------------
	299	+ void Snorm() { //normalize gnome scores
	300	+ double s;
	301	+ for (size_t i = 0; i < p; i++) {
	302	+ s += S[i]; //sum of all gnome score in population
	303	+ }
	304	+ //std::cout<<"mean Score is: "<<(double) s/p;
	305	+ for (size_t i = 0; i <p; i++)
	306	+ S[i] = S[i] / s;
	307	+ }
	308	+
	309	+ size_t* sortSIndx() { //sort gnome index according to gnome scores
	310	+ //sort indices of score in ascending order (fitness value)
	311	+ size_t idx = (size_t)malloc(p * sizeof(size_t)); //array to hold sorted gnome index
	312	+ for (size_t i = 0; i < p; i++) { //initialize index array from 1 to p(population size) in an ascending order
	313	+ idx[i] = i;
	314	+ }
	315	+
	316	+ for (size_t i = 0; i<p; i++) { //sort gnome indices according to score values using bubble sort
	317	+ for (size_t j = i + 1; j<p; j++) {
	318	+ if (S[idx[i]] > S[idx[j]]) {
	319	+ std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
	320	+ }
	321	+ }
	322	+ }
	323	+
	324	+ //display best gnome
	325	+ //std::cout << "best fitness value: " << S[idx[0]] << std::endl;
	326	+ /*if (S[idx[0]] < 0) {
	327	+ std::cout << "best gnome is " << std::endl;
	328	+ for (size_t i = 0; i < f; i++)
	329	+ std::cout << P[f * idx[0] + i] << ", ";
	330	+ std::cout << std::endl;
	331	+ }*/
	332	+
	333	+ return idx; //use as sortSIdx in selection
	334	+ }
	335	+
	336	+
	337	+ //size_t* sortIndx(float* input, size_t size) {
	338	+ // //sort indices of score in ascending order (fitness value)
	339	+ // size_t *idx;
	340	+ // idx = (size_t)malloc(size sizeof(size_t));
	341	+ // for (size_t i = 0; i < size; i++)
	342	+ // idx[i] = i;
	343	+
	344	+ // for (size_t i = 0; i<size; i++) {
	345	+ // for (size_t j = i + 1; j<size; j++) {
	346	+ // if (input[idx[i]] < input[idx[j]]) {
	347	+ // std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
	348	+ // }
	349	+ // }
	350	+ // }
	351	+ // return idx; //use as sortSIdx in selection
	352	+
	353	+ //}
	354	+
	355	+ void generateNewP(unsigned int* newPop) {
	356	+ //std::memcpy(P, 0 , p * f *sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s
	357	+ std::memcpy(P, newPop, p * f * sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s
	358	+ }
	359	+
	360	+ //============================== functions for fitness function ===========================================================================
	361	+ //compute total mean M (1 X B) of all features (tP X B)
	362	+ void ttlMean(float* M, size_t tP, size_t B) {
	363	+ //std::cout<<"total number of pixels are "<<tP<<std::endl;
	364	+ for (int k = 0; k < tP; k++) { //total number of pixel in feature matrix
	365	+ for (size_t n = 0; n < B; n++) { // index of feature in ith gnome
	366	+ M[n] += F[k * B + n];
	367	+ }
	368	+ }
	369	+ for (size_t n = 0; n < B; n++) //take an avarage of above summation
	370	+ M[n] = M[n] / (float)tP;
	371	+ }
	372	+
	373	+ void dispalymean(float* M) { //display mean
	374	+ std::cout << std::endl;
	375	+ std::cout << "Total mean of gnome 1 features are is " << std::endl;
	376	+
	377	+ for (size_t i = 0; i < 1; i++) {
	378	+ for (size_t j = 0; j < f; j++) {
	379	+ size_t index = P[i*f + j];
	380	+ std::cout << "feature index " << index << "\t total mean" << M[index] << std::endl;
	381	+ }
	382	+ }
	383	+ std::cout << std::endl;
	384	+ }
	385	+
	386	+ //Compute class means cM (p x nC x f) of all gnome features phe(tP x f)
	387	+ void classMean(float* cM, size_t tP, size_t nC, size_t B, std::vector<unsigned int> nPxInCls) {
	388	+ for (size_t c = 0; c < nC; c++) { //index of class feature matrix responses
	389	+ float* tempcM = (float*)calloc(B, sizeof(float)); //tempcM holds classmean vector for current gnome 'i', class 'c'
	390	+ for (size_t k = 0; k < tP; k++) { //total number of pixel in feature matrix
	391	+ if (T[k] == c + 1) { //class numbers start from 1 not 0
	392	+ for (size_t n = 0; n < B; n++) { //total number of features in a gnome
	393	+ tempcM[n] += F[k * B + n]; //add phe value for feature n of class 'c' in ith gnome
	394	+ }
	395	+ }
	396	+ }
	397	+ for (size_t n = 0; n < B; n++)
	398	+ cM[c * B + n] = tempcM[n] / (float)nPxInCls[c]; //divide by number of pixels from class 'c'
	399	+
	400	+ }
	401	+
	402	+ }
	403	+
	404	+ //display class mean
	405	+ void dispalyClassmean(float* cM, size_t nC) {
	406	+ std::cout << std::endl;
	407	+ std::cout << "class mean of gnome 1 with total classes " << nC << " is :" << std::endl;
	408	+ for (size_t i = 0; i < 1; i++) {
	409	+ for (size_t c = 0; c < nC; c++) {
	410	+ for (size_t j = 0; j < f; j++) {
	411	+ size_t index = P[i*f + j];
	412	+
	413	+ std::cout << "class index: " << c << "\t feature index " << index << "\t class mean " << cM[c * ub + index] << std::endl;
	414	+ }
	415	+ }
	416	+ }
	417	+ std::cout << std::endl;
	418	+ }
	419	+
	420	+ //-----------------------------------------between and within class Scattering computation---------------------------------------------------------------
	421	+ //computation on CPU
	422	+ void cpu_computeSbSw(float* sb, float* sw, float* M, float* cM, size_t nC, size_t tP, std::vector<unsigned int> nPxInCls) {
	423	+ timer.start();
	424	+ computeSb(sb, M, cM, nC, nPxInCls); //compute between class scatter on CPU
	425	+ const auto elapsed = timer.time_elapsed();
	426	+ std::cout << "Sb CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count() << "us" << std::endl;
	427	+
	428	+ timer.start();
	429	+ computeSw(sw, cM, nC, tP); //compute within class scatter on CPU
	430	+ const auto elapsed1 = timer.time_elapsed();
	431	+ std::cout << "Sw CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
	432	+ }
	433	+
	434	+ //display between class scatter
	435	+ void displaySb(float* sb) {
	436	+ std::cout << "between scatter is " << std::endl;
	437	+ for (size_t g = 0; g<1; g++) {
	438	+ std::cout << std::endl;
	439	+ for (size_t j = 0; j < f; j++) { //total number of features in a gnome
	440	+ for (size_t k = 0; k < f; k++) { //total number of features in a gnome
	441	+ std::cout << sb[g * f * f + j * f + k] << " ";
	442	+ }
	443	+ std::cout << std::endl;
	444	+ }
	445	+ }
	446	+ std::cout << std::endl;
	447	+ }
	448	+
	449	+ //Compute between class scatter sb (p x f x f) of all gnome features phe(tP x f)
	450	+ void computeSb(float* sb, float* M, float* cM, size_t nC, std::vector<unsigned int> nPxInCls) {
	451	+ float tempsbval;
	452	+ size_t n1;
	453	+ size_t n2;
	454	+ size_t classIndx; //class index in class mean matrix
	455	+ /*std::cout <<"population of computation of cpusb "<< std::endl;
	456	+ for (size_t i2 = 0; i2 < f; i2++) {
	457	+ std::cout << P[i2] << "\t";
	458	+ }*/
	459	+
	460	+ for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) {
	461	+ for (size_t c = 0; c < nC; c++) {
	462	+ for (size_t i = 0; i < f; i++) {
	463	+ for (size_t j = 0; j < f; j++) {
	464	+ tempsbval = 0;
	465	+ classIndx = c * ub;
	466	+ n1 = P[gnomeIndx * f + i]; //actual feature index in original feature matrix
	467	+ n2 = P[gnomeIndx * f + j]; //actual feature index in original feature matrix
	468	+ // std::cout << "i: " << i << " j: " <<j<< " n1: " << n1 << " n2:" << n2 << std::endl;
	469	+ tempsbval = ((cM[classIndx + n1] - M[n1]) *(cM[classIndx + n2] - M[n2]));
	470	+ sb[gnomeIndx * f * f + i * f + j] += tempsbval * (float)nPxInCls[c]; // compute tempsb[j][k] element of class 'c' of gnome 'i'
	471	+ }
	472	+ }
	473	+ }
	474	+ }
	475	+
	476	+ }
	477	+
	478	+ //Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f)
	479	+ void computeSw(float* sw, float* cM, size_t nC, size_t tP) {
	480	+ float tempswval;
	481	+ size_t n1;
	482	+ size_t n2;
	483	+ size_t cMclass; //class index in class mean matrix
	484	+ size_t Pg;
	485	+ size_t swg;
	486	+ size_t pheg;
	487	+ for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) {
	488	+ Pg = gnomeIndx * f;
	489	+ swg = gnomeIndx * f * f;
	490	+ pheg = gnomeIndx * tP * f;;
	491	+ for (size_t c = 0; c < nC; c++) {
	492	+ cMclass = c * ub;
	493	+
	494	+ for (size_t k = 0; k < tP; k++) {
	495	+ if (T[k] == (c + 1)) {
	496	+ for (size_t i = 0; i < f; i++) {
	497	+ for (size_t j = 0; j < f; j++) {
	498	+ n1 = P[Pg + i]; //actual feature index in original feature matrix
	499	+ n2 = P[Pg + j]; //actual feature index in original feature matrix
	500	+
	501	+ tempswval = 0;
	502	+ tempswval = ((F[k * ub + n1] - cM[cMclass + n1]) * (F[k * ub + n2] - cM[cMclass + n2]));
	503	+ //tempswval = ((phe[gnomeIndx * tP * f + k * f + i] - cM[c * ub + P[gnomeIndx * f + i]]) * (phe[gnomeIndx * tP f + k f + j] - cM[c * ub + P[gnomeIndx * f + j]]));
	504	+ sw[gnomeIndx * f * f + i * f + j] += tempswval;
	505	+ }
	506	+ }
	507	+ }
	508	+ }
	509	+ }
	510	+
	511	+ }
	512	+ }
	513	+ //checking bands with all zeros and replacing duplicated bands in gnome but this function is only for initial population
	514	+ //void zerobandcheck(float* M, bool initial) {
	515	+ // for (size_t g = 0; g < p; g++) { // for each gnome
	516	+ // for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome
	517	+ // while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population
	518	+ // P[g * f + i] = rand() % ub + lb;
	519	+ // }
	520	+ // }
	521	+ // //checking for duplicats in a gnome
	522	+ // std::vector<unsigned int> gnome = getGnome(g);
	523	+ // std::vector<unsigned int> gnomeunique;
	524	+ // int flag = 0; //flag will be set if gnome has duplicated band (feature) index
	525	+ // std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
	526	+ // std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //keep only unique copies of indices and remove duplicate copies
	527	+ // if (gnomeunique.size()< gnome.size()) {
	528	+ // flag = 1; //set flag for those if there are duplicated indices
	529	+ // //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
	530	+ // }
	531	+
	532	+ // //adding extra random feature indices to unique copy of gnome to achive gnome size = f
	533	+ // if (gnomeunique.size() < f) {
	534	+ // for (size_t k = gnomeunique.size(); k < f; k++) {
	535	+ // unsigned int rnumber = rand() % ub + lb;
	536	+ // //check if this randomaly generated number is already present in that gnome or not
	537	+ // for (size_t j = 0; j < gnomeunique.size(); j++) {
	538	+ // if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number
	539	+ // rnumber = rand() % ub + lb;
	540	+ // j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome
	541	+ // }
	542	+ // }
	543	+ // gnomeunique.push_back(rnumber); //add feature index to gnomeunique
	544	+ // }
	545	+ // }
	546	+ // std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f);
	547	+ // }
	548	+ //}
	549	+
	550	+ //checking bands with all zeros and replacing duplicated bands in gnome
	551	+ void zerobandcheck(float* M, bool initialPop) {
	552	+ size_t startgnome;
	553	+ if (initialPop) {
	554	+ startgnome = 0; //for initial population check all gnomes
	555	+ }
	556	+ else {
	557	+ startgnome = 1; //for next generations start gnome check after elite children offset
	558	+ }
	559	+ for (size_t g = startgnome; g < p; g++) { // for each gnome except
	560	+
	561	+ for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome
	562	+ while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population
	563	+ P[g * f + i] = rand() % ub + lb;
	564	+ }
	565	+ }
	566	+ //checking for duplicats in a gnome
	567	+ std::vector<unsigned int> gnome = getGnome(g); //get current gnome g from population matrix P
	568	+ std::vector<unsigned int> gnomeunique; //array to store only unique band indicies in a genome
	569	+ int flag = 0; //flag will be set if gnome has duplicated band (feature) index
	570	+ std::sort(gnome.begin(), gnome.end()); //sort current gnome
	571	+ std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //remove duplicat copies of band indices and keep only unique in a gnome
	572	+ if (gnomeunique.size()< gnome.size()) {
	573	+ flag = 1; //set flag for those if there are duplicated indices
	574	+ //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
	575	+ }
	576	+
	577	+ //adding extra random feature indices to unique copy of gnome to achive gnome size = f
	578	+ if (gnomeunique.size() < f) {
	579	+ for (size_t k = gnomeunique.size(); k < f; k++) {
	580	+ unsigned int rnumber = rand() % ub + lb;
	581	+ //check if this randomaly generated number is already present in that gnome or not
	582	+ for (size_t j = 0; j < gnomeunique.size(); j++) {
	583	+ if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number
	584	+ rnumber = rand() % ub + lb; //generate random number between upper bound and lower bound (ub. lb)
	585	+ j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome
	586	+ }
	587	+ }
	588	+ gnomeunique.push_back(rnumber); //add feature index to gnomeunique
	589	+ }
	590	+ }
	591	+
	592	+ //diplay loop only if gnome has duplicated indices
	593	+ //if(flag ==1){
	594	+ // std::cout<<"\n original gnome "<<g<<" are "<<std::endl;
	595	+ // for(int k = 0; k < gnome.size(); k++)
	596	+ // std::cout<<gnome[k]<<"\t";
	597	+ // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl;
	598	+ // for(int k = 0; k < gnomeunique.size(); k++)
	599	+ // std::cout<<gnomeunique[k]<<"\t";
	600	+ //}
	601	+ std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f); //copy new gnome without any duplicate band index at current gnome location
	602	+ }
	603	+ }
	604	+
	605	+
	606	+
	607	+ //gpu calling functions
	608	+ //gpu initialization (allocating space for all array on GPU)
	609	+ void gpuInitializationfrommain(float* cpuM, float* cpuCM, std::vector<unsigned int>cpu_nPxInCls, size_t tP, size_t nC) {
	610	+ // call gpuInitialization(......) with all of the necessary parameters
	611	+ gpuIntialization(&gpuP, p, f, &gpuCM, cpuCM, nC, ub, &gpuM, cpuM, &gpu_nPxInCls, &gpuSb, &gpuSw, &gpuF, F, &gpuT, T, tP, &cpu_nPxInCls[0]);
	612	+
	613	+ }
	614	+
	615	+ //Computation of between class scatter and within class scatter in GPU
	616	+ void gpu_computeSbSw(float* cpuSb, float* cpuSw, size_t nC, size_t tP, cudaDeviceProp props, size_t gen, bool debug, std::ofstream& profilefile) {
	617	+ //calling function for SW and Sb computation and passing necessary arrays for computation
	618	+ // std::cout<<"gpu function calling"<<std::endl;
	619	+ gpucomputeSbSw(gpuP, P, p, f, gpuSb, cpuSb, gpuSw, cpuSw, gpuF, gpuT, gpuM, gpuCM, nC, tP, props, gen, gnrtn, ub, gpu_nPxInCls, profilefile);
	620	+
	621	+ //display computed Sb and Sw if debug is set
	622	+ if (debug) {
	623	+ std::cout << "From GA-GPU class: gpu results of Sb sn Sw" << std::endl;
	624	+ displayS(cpuSb, f); //display Sb
	625	+ displayS(cpuSw, f); //display Sw
	626	+ std::cout << std::endl;
	627	+ }
	628	+ }
	629	+
	630	+ //call function to free gpu pointers
	631	+ //free all gpu pointers
	632	+ void gpu_Destroy() {
	633	+ gpuDestroy(gpuP, gpuCM, gpuM, gpu_nPxInCls, gpuSb, gpuSw, gpuF, gpuT);
	634	+ }
	635	+
	636	+ //Write a destructor here
	637	+ ~ga_gpu() {
	638	+
	639	+ if (F != NULL) std::free(F); //not sure about this as it is only for 2nd constructor
	640	+ if (T != NULL) std::free(T); //same as above
	641	+ if (P != NULL) std::free(P); //not sure about this as it is only for 2nd constructor
	642	+ if (S != NULL) std::free(S); //same as above
	643	+ //if(i_guess!=NULL) std::free(i_guess); //same as above
	644	+ //HANDLE_ERROR(cudaDeviceReset());
	645	+
	646	+ }
	647	+ };
	648	+
	649	+#endif
...	...

src/main.cpp 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/src/main.cpp
	1	+#include <iostream>
	2	+
	3	+//stim libraries
	4	+#include <stim/envi/envi.h>
	5	+#include <stim/image/image.h>
	6	+#include <stim/ui/progressbar.h>
	7	+#include <stim/parser/filename.h>
	8	+#include <stim/parser/table.h>
	9	+#include <stim/parser/arguments.h>
	10	+//input arguments
	11	+stim::arglist args;
	12	+#include <fstream>
	13	+#include <thread>
	14	+#include <random>
	15	+#include <vector>
	16	+#include <math.h>
	17	+#include <limits>
	18	+
	19	+#define NOMINMAX
	20	+
	21	+
	22	+
	23	+//GA
	24	+#include "ga_gpu.h"
	25	+#include "enviload.h"
	26	+
	27	+
	28	+//envi input file and associated parameters
	29	+stim::envi E; //ENVI binary file object
	30	+unsigned int B; //shortcuts storing the spatial and spectral size of the ENVI image
	31	+//mask and class information used for training
	32	+//std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel#
	33	+std::vector<unsigned int> nP; //array holds the number of pixels in each mask: nP[m] is the number of pixels in mask m
	34	+size_t nC = 0; //number of classes
	35	+size_t tP = 0; //total number of pixels in all masks: tP = nP[0] + nP[1] + ... + nP[nC]
	36	+float* fea;
	37	+
	38	+//ga_gpu class object
	39	+ga_gpu ga;
	40	+bool debug;
	41	+bool binaryClass;
	42	+int binClassOne;
	43	+
	44	+//creating struct to pass to thread functions as it limits number of arguments to 3
	45	+typedef struct {
	46	+ float* S;
	47	+ float* Sb;
	48	+ float* Sw;
	49	+ float* lda;
	50	+}gnome;
	51	+gnome gnom;
	52	+
	53	+
	54	+void gpuComputeEignS( size_t g, size_t fea){
	55	+ //eigen value computation will return r = (nC-1) eigen vectors so new projected data will have dimension of r rather than f
	56	+ // std::thread::id this_id = std::this_thread::get_id();
	57	+ // std::cout<<"thread id is "<< this_id<<std::endl;
	58	+ size_t f = fea;
	59	+ //std::thread::id g = std::this_thread::get_id();
	60	+ float* LeftEigVectors_a = (float) malloc(f f * sizeof(float));
	61	+ float* gSw_a = (float) malloc(f f * sizeof(float)); //copy of between class scatter
	62	+ std::memcpy(gSw_a, &gnom.Sw[g * f * f], f * f *sizeof(float));
	63	+ if(debug){
	64	+ std::cout<<"From Eigen function: Sb and Sw "<<std::endl;
	65	+ displayS(gSw_a, f); //display Sb
	66	+ displayS(&gnom.Sb[g * f * f], f); //display Sw
	67	+ std::cout<<std::endl;
	68	+ }
	69	+
	70	+ std::vector<unsigned int> features = ga.getGnome(g);
	71	+ std::vector<unsigned int> featuresunique;
	72	+ int flag = 0;
	73	+ std::sort(features.begin(), features.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
	74	+ std::unique_copy(features.begin(), features.end(), std::back_inserter(featuresunique));
	75	+ if(featuresunique.size()< features.size()){
	76	+ f = featuresunique.size();
	77	+ }
	78	+
	79	+ size_t r = nC-1; //LDA projected dimension (limited to number of classes - 1 by rank)
	80	+ if(r > f){
	81	+ r = f;
	82	+ }
	83	+
	84	+ int info;
	85	+ float* EigenvaluesI_a = (float)malloc(f sizeof(float));
	86	+ float* Eigenvalues_a = (float)malloc(f sizeof(float));
	87	+ int IPIV = (int) malloc(sizeof(int) * f);
	88	+ //computing inverse of matrix Sw
	89	+ memset(IPIV, 0, f * sizeof(int));
	90	+ LAPACKE_sgetrf(LAPACK_COL_MAJOR, (int)f, (int)f, gSw_a, (int)f, IPIV);
	91	+ // DGETRI computes the inverse of a matrix using the LU factorization computed by DGETRF.
	92	+ LAPACKE_sgetri(LAPACK_COL_MAJOR, (int)f, gSw_a, (int)f, IPIV);
	93	+
	94	+ float* gSbSw_a = (float)calloc(f f, sizeof(float));
	95	+ //mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f * sizeof(float)], f, f, f,f);
	96	+ mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f], f, f, f,f);
	97	+ if(debug){
	98	+ std::cout<<"From Eigen function: inverse of sw and ratio of sb and sw (Sb/Sw)";
	99	+ displayS(gSw_a, f); //display inverse of Sw (1/Sw)
	100	+ displayS(gSbSw_a, f); //display ratio of Sb and Sw (Sb/Sw)
	101	+ }
	102	+
	103	+ //compute left eigenvectors for current gnome from ratio of between class scatter and within class scatter: Sb/Sw
	104	+ info = LAPACKE_sgeev(LAPACK_COL_MAJOR, 'V', 'N', (int)f, gSbSw_a, (int)f, Eigenvalues_a, EigenvaluesI_a, LeftEigVectors_a, (int)f, 0, (int)f);
	105	+ //sort eignevalue indices in descending order
	106	+ size_t* sortedindx = sortIndx(Eigenvalues_a, f);
	107	+ //displayS(LeftEigVectors_a, f); //display Eignevectors (Note these are -1 * matlab eigenvectors does not change fitness score results but keep in mind while projecting data on it)
	108	+ //sorting left eigenvectors (building forward transformation matrix As)
	109	+ for (size_t rowE = 0; rowE < r; rowE++){
	110	+ for (size_t colE = 0; colE < f; colE++){
	111	+ size_t ind1 = g * r * f + rowE * f + colE;
	112	+ //size_t ind1 = rowE * f + colE;
	113	+ size_t ind2 = sortedindx[rowE] * f + colE; //eigenvector as row vector
	114	+ gnom.lda[ind1] = LeftEigVectors_a[ind2];
	115	+ }
	116	+ }
	117	+
	118	+ if(debug){
	119	+ std::cout<<"Eigenvalues are"<<std::endl;
	120	+ for(size_t n = 0 ; n < f; n ++){
	121	+ std::cout << Eigenvalues_a[n] << ", " ;
	122	+ }
	123	+ std::cout<< std::endl;
	124	+ std::cout<<"From Eigen function: Eignevector"<<std::endl;
	125	+
	126	+ std::cout<<"LDA basis is "<<std::endl;
	127	+ std::cout << "r is " << r << std::endl;
	128	+ for(size_t l = 0 ; l < r; l++){
	129	+ for(size_t n = 0 ; n < f; n ++){
	130	+ std::cout << gnom.lda[g * l * f + l * f + n] << ", " ;
	131	+ }
	132	+ std::cout<<std::endl;
	133	+ }
	134	+
	135	+ }
	136	+ //Extract only r eigne vectors as a LDA projection basis
	137	+ float* tempgSb = (float)calloc(r f, sizeof(float));
	138	+ //mtxMul(tempgSb, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sb[g * f * f * sizeof(float)], r, f, f,f);
	139	+ //mtxMul(tempgSb, &lda[g * r * f ], gSb, r, f, f,f);
	140	+ mtxMul(tempgSb, &gnom.lda[g * r * f], &gnom.Sb[g * f * f], r, f, f,f);
	141	+ float* nSb = (float)calloc(r r, sizeof(float));
	142	+ mtxMultranspose(nSb, tempgSb, &gnom.lda[g * r * f], r, f, r, f);
	143	+
	144	+ float* tempgSw = (float)calloc(r f, sizeof(float));
	145	+ //mtxMul(tempgSw, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sw[g * f * f * sizeof(float)], r, f, f,f);
	146	+ mtxMul(tempgSw, &gnom.lda[g * r * f], &gnom.Sw[g * f * f], r, f, f,f);
	147	+ float* nSw = (float)calloc(r r, sizeof(float));
	148	+ mtxMultranspose(nSw, tempgSw, &gnom.lda[g * r * f], r, f, r, f);
	149	+ if(debug){
	150	+ std::cout<<"From Eigen function: projected Sb sn Sw"<<std::endl;
	151	+ displayS(nSb, r); //display Sb
	152	+ displayS(nSw, r); //display Sw
	153	+ std::cout<<std::endl;
	154	+ }
	155	+
	156	+ cv::Mat newSw = cv::Mat((int)r, (int)r, CV_32FC1, nSw); //within scatter of gnome g in the population
	157	+ cv::Mat newSb = cv::Mat((int)r, (int)r, CV_32FC1, nSb); //within scatter of gnome g in the population
	158	+
	159	+ //fisher's ratio from ratio of projected sb and sw
	160	+ float fisherRatio = cv::determinant(newSb) /cv::determinant(newSw);
	161	+ gnom.S[g] = 1/fisherRatio;
	162	+ if (debug) {
	163	+ std::cout<<"Score["<<g<<"]: "<< gnom.S[g]<<std::endl;
	164	+
	165	+ std::cout << "best gnoem is " << std::endl;
	166	+ for (size_t i = 0; i < f; i++)
	167	+ std::cout << ga.P[ga.f * g + i] << ", ";
	168	+ std::cout << std::endl;
	169	+ }
	170	+// if(!isfinite(gnom.S[g])){
	171	+// std::cout<<"-----------------------------------------------"<<std::endl;
	172	+// std::cout<<"Displaying intermediate values of gnome for which score is non finite"<<std::endl;
	173	+// std::cout<<"population "<<std::endl;
	174	+// for(int i = 0; i < ga.f; i++){
	175	+// std::cout<<"\t"<<ga.P[g * ga.f + i];
	176	+// }
	177	+// std::cout<<std::endl;
	178	+// std::cout<<"Sb determinant is "<<cv::determinant(newSb)<<"\t Sw determinant is "<<cv::determinant(newSb)<<std::endl;
	179	+// std::cout<<"fisher ratio is "<<fisherRatio<<std::endl;
	180	+// std::cout<<"Score["<<g<<"]: "<< gnom.S[g]<<std::endl;
	181	+// std::cout<<"------------------------------------------------"<<std::endl;
	182	+//
	183	+// }
	184	+
	185	+
	186	+ if(IPIV!= NULL) std::free(IPIV);
	187	+ if(gSw_a!= NULL) std::free(gSw_a);
	188	+ if(gSbSw_a!= NULL) std::free(gSbSw_a);
	189	+ if(Eigenvalues_a!= NULL) std::free(Eigenvalues_a);
	190	+ if(EigenvaluesI_a!= NULL) std::free(EigenvaluesI_a);
	191	+ if(tempgSb!= NULL) std::free(tempgSb);
	192	+ if(tempgSw!= NULL) std::free(tempgSw);
	193	+
	194	+}
	195	+
	196	+
	197	+void fitnessFunction( float* sb, float* sw, float* lda, float* M, float* cM, size_t f, cudaDeviceProp props, size_t gen, std::ofstream& profilefile){
	198	+
	199	+ size_t tP = 0; //total number of pixels
	200	+ std::for_each(nP.begin(), nP.end(), [&] (size_t n) {
	201	+ tP += n;
	202	+ });
	203	+ size_t nC = nP.size(); //total number of classes
	204	+
	205	+ //--------------Compute between class scatter
	206	+ // ga.cpu_computeSbSw(sb, sw, M, cM, nC, tP, nP, gen);
	207	+ //
	208	+ //if(debug){
	209	+ // std::cout<<"cpu results of Sb sn Sw"<<std::endl;
	210	+ // displayS(sb, ga.f); //display Sb
	211	+ // displayS(sw, ga.f); //display Sw
	212	+ // }
	213	+
	214	+ //ga.callingGpuComputeSbSw(sb, sw, nC, tP, nP, props, gen);
	215	+ ga.gpu_computeSbSw(sb, sw, nC, tP, props, gen, debug, profilefile);
	216	+ //ga.cpu_computeSbSw(sb, sw, M, cM, nC, tP, nP);
	217	+
	218	+ if(debug){
	219	+ std::cout<<"From fitness function: gpu results of Sb sn Sw"<<std::endl;
	220	+ displayS(sb, ga.f); //display Sb
	221	+ displayS(sw, ga.f); //display Sw
	222	+ std::cout<<std::endl;
	223	+ }
	224	+
	225	+ // ----------------------- Linear discriminant Analysis --------------------------------------
	226	+ //timer.start();
	227	+ //structure is created to pass variable to thread function as it accepts only 3 arguments
	228	+ gnom.S = ga.S;
	229	+ gnom.Sw = sw;
	230	+ gnom.Sb = sb;
	231	+ gnom.lda = lda;
	232	+
	233	+ //calling function without using threads
	234	+ for (size_t i = 0; i<ga.p; i++){
	235	+ //calling function for eigencomputation
	236	+ gpuComputeEignS(i, f);
	237	+ //std::cout<<"Score["<<i<<"]: "<< ga.S[i]<<std::endl;
	238	+ }
	239	+
	240	+ //std::vector<std::thread> threads;
	241	+ //for (size_t g = 0; g<ga.p; g++){
	242	+ // //creating thread to do eigen computation
	243	+ // threads.push_back(std::thread(gpuComputeEignS, g, f));
	244	+ //}
	245	+ //
	246	+ //// loop again to join the threads
	247	+ //for (auto& t : threads)
	248	+ // t.join();
	249	+
	250	+ const auto elapsed1 = timer.time_elapsed();
	251	+ if(gen > ga.gnrtn - 2){
	252	+ std::cout << "gpu_eigen time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
	253	+ profilefile<< "gpu_eigen time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
	254	+ }
	255	+ //ga.S = gnom.score;
	256	+ //size_t bestGnomeIdx = ga.sortSIndx()[0];
	257	+
	258	+}//end of fitness function
	259	+
	260	+void binaryclassifier(int classnum){
	261	+ unsigned int* target = (unsigned int*) calloc(tP, sizeof(unsigned int));
	262	+ memcpy(target, ga.T, tP * sizeof(unsigned int));
	263	+ for(int i = 0 ; i < tP; i++){
	264	+ if(target[i]==classnum){
	265	+ ga.T[i] = 1;
	266	+
	267	+ }else
	268	+ ga.T[i] = 0;
	269	+ }
	270	+}
	271	+
	272	+
	273	+
	274	+void advertisement() {
	275	+ std::cout << std::endl;
	276	+ std::cout << "=========================================================================" << std::endl;
	277	+ std::cout << "Thank you for using the GA-GPU features selection for spectroscopic image!" << std::endl;
	278	+ std::cout << "=========================================================================" << std::endl << std::endl;
	279	+// std::cout << args.str();
	280	+}
	281	+
	282	+int main(int argc, char* argv[]){
	283	+
	284	+//Add the argument options and set some of the default parameters
	285	+ args.add("help", "print this help");
	286	+ args.section("Genetic Algorithm");
	287	+ args.add("features", "select features selection algorithm parameters","10", "number of features to be selected");
	288	+ args.add("classes", "image masks used to specify classes", "", "class1.bmp class2.bmp class3.bmp");
	289	+ args.add("population", "total number of feature subsets in puplation matrix", "1000");
	290	+ args.add("generations", "number of generationsr", "50");
	291	+ args.add("initial_guess", "initial guess of featues", "");
	292	+ args.add("debug", "display intermediate data for debugging");
	293	+ args.add("binary", "Select features for binary classes", "");
	294	+ args.add("trim", "this gives wavenumber to use in trim option of siproc which trims all bands from envi file except gagpu selected bands");
	295	+
	296	+ args.parse(argc,argv); //parse the command line arguments
	297	+
	298	+//Print the help text if set
	299	+ if(args["help"].is_set()){ //display the help text if requested
	300	+ advertisement();
	301	+ std::cout<<std::endl<<"usage: ga-gpu input output --option [A B C ...]"<<std::endl;
	302	+ std::cout<<std::endl<<std::endl;
	303	+ std::cout<<args.str()<<std::endl;
	304	+ exit(1);
	305	+ }
	306	+ if (args.nargs() < 2) { //if the user doesn't provide input and output files
	307	+ std::cout << "ERROR: GA-GPU requires an input (ENVI) file and an output (features, text) file." << std::endl;
	308	+ return 1;
	309	+ }
	310	+ if (args["classes"].nargs() < 2) { //if the user doesn't specify at least two class images
	311	+ std::cout << "ERROR: GA-GPU requires at least two class images to be specified using the --classes option" << std::endl;
	312	+ return 1;
	313	+ }
	314	+
	315	+ std::string outfile = args.arg(1); //outfile is text file where bnad index, LDA-basis, wavelength and if --trim option is set then trim wavelengths are set respectively
	316	+ std::string profile_file = "profile_" + outfile ;
	317	+ std::ofstream profilefile(profile_file.c_str(), std::ios::out); //open outfstream for outfile
	318	+
	319	+ time_t t_start = time(NULL); //start a timer for file reading
	320	+ E.open(args.arg(0), std::string(args.arg(0)) + ".hdr"); //open header file
	321	+ size_t X = E.header.samples; //total number of pixels in X dimension
	322	+ size_t Y = E.header.lines; //total number of pixels in Y dimension
	323	+ B = (unsigned int)E.header.bands; //total number of bands (features)
	324	+ std::vector<double> wavelengths = E.header.wavelength; //wavelengths of each band
	325	+
	326	+ if(E.header.interleave != stim::envi_header::BIP){ //this code can only load bip files and hence check that in header file
	327	+ std::cout<<"this code works for only bip files. please convert file to bip file"<<std::endl;
	328	+ exit(1); //if file is not bip file exit code execution
	329	+ }
	330	+
	331	+///--------------------------Load features---------------------------------------------
	332	+ nP = ga_load_class_images(argc, args, &nC, &tP); //load supervised class images
	333	+ ga.F = load_features( nC, tP, B, E, nP); //generate the feature matrix
	334	+ ga.T = ga_load_responses(tP, nC, nP); //load the responses for RF training
	335	+ E.close(); //close the hyperspectral file
	336	+ time_t t_end = time(NULL);
	337	+ std::cout<<"Total time: "<<t_end - t_start<<" s"<<std::endl;
	338	+ //display_PixelfeatureNclass(ga.F, ga.T , B , 1); //Print one value from feature matrix to debug feature loading
	339	+ //std::cout << "pixel target is " << ga.T[0] << " " << ga.T[1] << " " << ga.T[tP - 2] << " " << ga.T[tP - 1]<<std::endl;
	340	+///--------------------------Genetic algorith configurations with defult paramets and from argument values---------------------
	341	+ ga.f = args["features"].as_int(0); //number of features to be selected by user default value is 10
	342	+ ga.p = args["population"].as_int(0); //population size to be selected by user default value is 1000
	343	+ ga.gnrtn = args["generations"].as_int(0); //number of generations to be selected by user default value is 50
	344	+ if(args["binary"]) { //set this option when features are to be selected as binary clas features (class vs stroma)
	345	+ binClassOne = args["binary"].as_int(0); //sel class number here, if 2 then features are selected for (class-2 vs stroma)
	346	+ //feture selection for class selected by user with user arguments (make it binary class data by making chosen class label as 1 and al other class labels 0 from multiclass data )
	347	+ //to select feature for all classes in joint class data using binary class system need to write a script with loop covering all classes
	348	+ binaryclassifier(binClassOne);
	349	+ } ///not fully implemented yet
	350	+
	351	+ ga.ub = B; //upper bound is number of bands (i.e. size of z dimension) Note: for this particular application and way code is written lower bound is 0 and upper bound is size of z dimension
	352	+ ga.uniformRate = 0.5; //uniform rate is used in crossover
	353	+ ga.mutationRate = 0.5f; //in percentage for mutation operation on gnome
	354	+ ga.tournamentSize = 5; //for crossover best parents are selected from tournament of gnomes
	355	+ ga.elitism = true; // if it is true then best gnome of current generation is passed to next generation
	356	+ //initial guess of population
	357	+ ga.i_guess = (unsigned int*) calloc(ga.f, sizeof(unsigned int));
	358	+ debug = args["debug"];
	359	+
	360	+//==================Generate intial population =================================================
	361	+ std::vector<unsigned int> i_guess(ga.f);
	362	+ for (size_t b = 0; b < ga.f; b++) //generate default initial guess
	363	+ i_guess[b] = rand() % B + 0;
	364	+
	365	+ if (args["initial_guess"].is_set()) {//if the user specifies the --initialguess option & provides feature indices as initial guess
	366	+ size_t nf = args["initial_guess"].nargs(); //get the number of arguments after initial_guess
	367	+ if (nf == 1 \|\| nf == ga.f) { //check if file with initial guessed indices is given or direct indices are given as argument
	368	+ if (nf == 1) { //if initial guessed feature indices are given in file
	369	+ std::ifstream in; //open the file containing the baseline points
	370	+ in.open(args["initial_guess"].as_string().c_str());
	371	+ if (in.is_open()){ //if file is present and can be opened then read it
	372	+ unsigned int b_ind;
	373	+ while (in >> b_ind) //get each band index and push it into the vector
	374	+ i_guess.push_back(b_ind);
	375	+ }
	376	+ else
	377	+ std::cout << "cannot open file of initial_guess indices" << std::endl;
	378	+ }
	379	+ else if (nf == ga.f) { //if direct indices are given as argument
	380	+ for (size_t b = 0; b < nf; b++) //for each band given by the user
	381	+ i_guess[b] = args["initial_guess"].as_int(b); //store that band in the i_guess array
	382	+ }
	383	+ }
	384	+ }
	385	+
	386	+ ga.initialize_population(i_guess, debug); //initialize first population set for first generation, user can pass manually preferred features from command line
	387	+ //display_gnome(0);
	388	+
	389	+//------------------Calculate class means and total mean of features----------------------------
	390	+ float* M = (float*)calloc( B , sizeof(float)); //total mean of entire feature martix for all features (bands B)
	391	+ ga.ttlMean(M, tP, B); //calculate total mean, ga.F is entire feature matrix, M is mean for all bands B(features)
	392	+ if(debug) ga.dispalymean(M); //if option --debug is used display all bands mean
	393	+
	394	+ //display band index of bands with mean zero, this indicates that band has all zero values
	395	+ std::cout<<"Display features indices with zero mean "<<std::endl;
	396	+ for(unsigned int i = 0; i < B; i++){
	397	+ if(M[i]== 0)
	398	+ std::cout<<"\t"<<i;
	399	+ }
	400	+ std::cout<<std::endl;
	401	+// std::cout << "pixel target is " << ga.T[0] << " " << ga.T[1] << " " << ga.T[tP - 2] << " " << ga.T[tP - 1]<<std::endl;
	402	+ float* cM = (float)calloc(nC B , sizeof(float)); //cM is nC X B matrix with each row as mean of all samples in one class for all features (bands B)
	403	+ ga.classMean(cM, tP, nC, B, nP); //calculate class mean, ga.F is entire feature matrix, M is mean for all bands B(features)
	404	+ if(debug) ga.dispalyClassmean(cM, nC);
	405	+
	406	+//------------------------------------GPU init----------------------------------------------------
	407	+ //checking for cuda device
	408	+ int count;
	409	+ HANDLE_ERROR(cudaGetDeviceCount(&count));
	410	+ if(count < 1){
	411	+ std::cout<<"no cuda device is available"<<std::endl;
	412	+ return 1;
	413	+ }
	414	+ cudaDeviceProp props;
	415	+ HANDLE_ERROR(cudaGetDeviceProperties(&props, 0));
	416	+ ga.gpuInitializationfrommain(M, cM, nP, tP, nC);
	417	+
	418	+//feture selection for class selected by user with user arguments (make it binary class data by making chosen class label as 1 and al other class labels 0 from multiclass data )
	419	+//to select feature for all classes in joint class data using binary class system need to write a script with loop covering all classes
	420	+ //if(binaryClass){
	421	+ // binaryclassifier(binClassOne);
	422	+ //}
	423	+
	424	+//============================= GA evolution by generations ====================================================
	425	+ std::vector<unsigned int> bestgnome; //holds best gnome after each generation evaluation
	426	+ size_t bestG_Indx; //This gives index of best gnome in the current population to get best gnome and its fitness value
	427	+ unsigned int* newPop = (unsigned int) calloc(ga.p ga.f, sizeof(unsigned int)); //temprory storage of new population
	428	+ double* best_S = (double*) calloc (ga.gnrtn, sizeof(double)); //stores fitness value of best gnome at each iteration
	429	+ float* lda = (float) calloc (ga.p (nC-1) * ga.f, sizeof(float)); //stores LDA basis for each gnome so that we can have best gnome's LDA basis
	430	+ float* sb = (float) calloc( ga.p ga.f * ga.f , sizeof(float)) ; //3d matrix for between class scatter (each 2d matrix between class scatter for one gnome)
	431	+ float* sw = (float) calloc( ga.p ga.f * ga.f , sizeof(float)) ; //3d matrix for within class scatter (each 2d matrix within class scatter for one gnome)
	432	+ ga.zerobandcheck(M, true); //checking bands with all zeros and duplicated bands in a gnome replacing them with other bands avoiding duplication and zero mean
	433	+ ga.zerobandcheck(M, true); //Repeating zeroband cheack as some of these bands are not replaced in previous run and gave random results
	434	+ time_t gpu_timestart = time(NULL); //start a timer for total evoluation
	435	+
	436	+ for (size_t gen = 0; gen < ga.gnrtn; gen++){ //for each generation find fitness value of all gnomes in population matrix and generate population for next generation
	437	+ //std::cout<<"Generation: "<<gen<<std::endl;
	438	+ fitnessFunction(sb, sw, lda, M , cM, ga.f, props, gen, profilefile); //Evaluate phe(feature matrix for current population) for fitness of all gnomes in current population
	439	+ timer.start(); //start timer for new population generation
	440	+ bestG_Indx = ga.evolvePopulation(newPop, M, debug); //evolve population to generate new generation population
	441	+ const auto pop_generation = timer.time_elapsed(); // end timer for new population generation
	442	+ if(gen >ga.gnrtn -2){
	443	+ std::cout << "population evolution time "<<std::chrono::duration_cast<std::chrono::microseconds>(pop_generation).count() << "us" << std::endl;
	444	+ profilefile<<"population evolution time "<<std::chrono::duration_cast<std::chrono::microseconds>(pop_generation).count() << "us" <<std::endl;
	445	+ }
	446	+
	447	+ best_S[gen] = ga.S[bestG_Indx]; //score of best gnome in current generation
	448	+ bestgnome = ga.getGnome(bestG_Indx); //Best gnome of current populaation
	449	+ ga.generateNewP(newPop); //replace current population with new populaiton in the ga classs object
	450	+ ga.zerobandcheck(M, false); //checking bands with all zeros and duplicated bands in a gnome replacing them with other bands avoiding duplication and zero mean
	451	+ ga.zerobandcheck(M, false); //Repeating zeroband cheack as some of these bands are not replaced in previous run and gave random results
	452	+ }//end generation
	453	+
	454	+ time_t gpu_timeend = time(NULL); //end a timer for total evoluation
	455	+ std::cout<<"Total gpu time: "<<gpu_timeend - gpu_timestart<<" s"<<std::endl;
	456	+ profilefile<<"Total gpu time: "<<gpu_timeend - gpu_timestart<<" s"<<std::endl;
	457	+
	458	+//================================ Results of GA ===============================================================
	459	+ std::cout<<"best gnome's fitness value is "<<best_S[ga.gnrtn-1]<<std::endl;
	460	+ std::cout<<"best gnome is: ";
	461	+ for(size_t i = 0; i < ga.f; i++){
	462	+ std::cout<<" "<<(bestgnome.at(i));
	463	+ }
	464	+ std::cout<<std::endl;
	465	+
	466	+ //create a text file to store the LDA stats (features subset and LDA-basis)
	467	+ ////format of CSV file is: 1st row - band index, 2nd LDA basis depending on number of classes, 3rd - wavenumber corresponding to band index and it --trim is selected then trim wavnumbersare also given
	468	+ std::ofstream csv(outfile.c_str(), std::ios::out); //open outfstream for outfile
	469	+ size_t ldaindx = bestG_Indx * (nC-1) * ga.f ; //Compute LDA basis index of best gnome
	470	+
	471	+ //if(binaryClass){ //this option is for binary class feature selection from joint classes but not fully implemented
	472	+ // csv<<binClassOne<<std::endl;
	473	+ //}
	474	+
	475	+ //fitness values of best gnome is
	476	+ csv<<"best gnome's fitness value is "<<best_S[ga.gnrtn-1]<<std::endl; //output fitness value of best gnome in last generation
	477	+ //output gnome i.e. band index of selected featurs
	478	+ csv<<(bestgnome.at(0)); //output feature subset
	479	+ for(size_t i = 1; i < ga.f; i++)
	480	+ csv<<","<<(bestgnome.at(i));
	481	+ csv<<std::endl;
	482	+
	483	+ //output LDA basis of size r X f, r is nC - 1 as LDA projection is rank limited by number of classes - 1
	484	+ for (size_t i = 0; i < nC-1; i++){
	485	+ csv<<lda[ldaindx + i * ga.f ];
	486	+ for (size_t j = 1; j < ga.f; j++){
	487	+ csv<<","<<lda[ldaindx + i * ga.f +j];
	488	+ }
	489	+ csv << std::endl;
	490	+ }
	491	+ //output actual wavelenths corresponding to those band indices
	492	+ csv << (wavelengths[bestgnome.at(0)]);
	493	+ for (size_t i = 1; i < ga.f; i++)
	494	+ csv << "," << (wavelengths[bestgnome.at(i)]);
	495	+ csv << std::endl;
	496	+
	497	+
	498	+ if (args["trim"].is_set()) {
	499	+ csv << "trim info" << std::endl;
	500	+ std::sort(bestgnome.begin(), bestgnome.end()); //sort features index in best gnome
	501	+
	502	+ std::vector<unsigned int> trimindex(ga.f); //create a vector to store temprory trim index bounds
	503	+ std::vector<unsigned int> finaltrim_ind; //create a vector to store final trim index bounds
	504	+ std::vector<unsigned int> trim_wv; //create a vector to store final trim wavelength bounds
	505	+
	506	+ //trim index
	507	+ trimindex.push_back(1); //1st trimming band index is 1
	508	+ for (size_t i = 0; i < ga.f; i++) { // for each feature find its bound indexes
	509	+ trimindex[i * 2] = bestgnome.at(i) - 1;
	510	+ trimindex[i * 2 + 1] = bestgnome.at(i) + 1;
	511	+ }
	512	+ trimindex.push_back(B); //last bound index is B
	513	+
	514	+ //organize trim index
	515	+ int k = 0;
	516	+ for (size_t i = 0; i < ga.f + 1; i++) { // find valid pair of trim indices bound excluding adjacent trim indices
	517	+ if (trimindex[2 * i] < trimindex[2 * i + 1]) {
	518	+ finaltrim_ind.push_back(trimindex[2 * i]); //this is left bound
	519	+ finaltrim_ind.push_back(trimindex[2 * i + 1]);
	520	+ k = k + 2;
	521	+ }
	522	+ }
	523	+ //add duplicated trim indices as single index to final trim index
	524	+ for (size_t i = 0; i < ga.f + 1; i++) { //check each pair of trim indices for duplications
	525	+ if (trimindex[2 * i] == trimindex[2 * i + 1]) { // (duplication caused due to adjacent features)
	526	+ finaltrim_ind.push_back(trimindex[2 * i]); // remove duplicated trim indices replace by one
	527	+ k = k + 1;
	528	+ }
	529	+ }
	530	+
	531	+
	532	+ ////output actual wavelenths corresponding to those trim indices
	533	+ ////these wavenumber are grouped in pairs, check each pair, if duplicated numbers are there in pair delete one and keep other as band to trim, if 2nd wavnumber is smaller than 1st in a pair ignore that pair
	534	+ ////e.g [1, 228, 230, 230, 232, 350,352, 351, 353, 1200] pairas [1(start)-228,230-230, 232-350, 352-351, 353-1200(end)], trimming wavenumbers are [1-228, 230, 233-350, 353-1200]
	535	+ csv << (wavelengths[finaltrim_ind.at(0)]);
	536	+ for (size_t i = 1; i < ga.f * + 2 ; i++)
	537	+ csv << "," << (wavelengths[finaltrim_ind.at(i)]);
	538	+ csv << std::endl;
	539	+ } //end trim option
	540	+
	541	+
	542	+ //free gpu pointers
	543	+ ga.gpu_Destroy();
	544	+
	545	+ //free pointers
	546	+ std::free(sb);
	547	+ std::free(sw);
	548	+ std::free(M);
	549	+ std::free(cM);
	550	+ std::free(best_S);
	551	+ std::free(lda);
	552	+ std::free(newPop);
	553	+
	554	+}//end main
	555	+
	556	+
...	...

src/timer.h 0 → 100644

Wrap text Show/Hide comments View file @0d84034

	1	+++ a/src/timer.h
	1	+#ifndef GA_GPU_TIMER_H
	2	+#define GA_GPU_TIMER_H
	3	+
	4	+#include <chrono>
	5	+//#include <thread>
	6	+//#include <iostream>
	7	+
	8	+//using namespace std::chrono;
	9	+
	10	+class Timer {
	11	+typedef std::chrono::high_resolution_clock Clock;
	12	+
	13	+Clock::time_point epoch;
	14	+public:
	15	+ void start(){
	16	+ epoch = Clock::now();
	17	+ }
	18	+ Clock::duration time_elapsed() const{
	19	+ return Clock::now() - epoch;
	20	+ }
	21	+};
	22	+
	23	+#endif
	24	+
	25	+
	26	+//class Timer {
	27	+// std::chrono::time_point<std::chrono::high_resolution_clock> epoch;
	28	+//
	29	+// public:
	30	+// typedef high_resolution_clock Clock;
	31	+// void start(){
	32	+// epoch = Clock::now();
	33	+// }
	34	+// std::chrono::time_point<std::chrono::high_resolution_clock> time_elapsed() const {
	35	+// return Clock::now() - epoch;
	36	+// }
	37	+//};
0	38	\ No newline at end of file
...	...