release / genetic-gpu

Browse Code »

Commit 0d84034253937b68843ec545a0050dd6abaccf5c

Authored by David Mayerich 2019-12-16 13:50:34 -0600

0 parents

public release commit

Showing 11 changed files with 2164 additions and 0 deletions Show diff stats

Inline Side-by-side

CMakeLists.txt 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/CMakeLists.txt
		1	+#Specify the version being used aswell as the language
		2	+cmake_minimum_required(VERSION 2.8)
		3	+
		4	+#Name your project here
		5	+project(ga-gpu)
		6	+
		7	+#set the module directory
		8	+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
		9	+
		10	+#default to release mode
		11	+if(NOT CMAKE_BUILD_TYPE)
		12	+ set(CMAKE_BUILD_TYPE Release)
		13	+endif(NOT CMAKE_BUILD_TYPE)
		14	+
		15	+#build the executable in the binary directory on MS Visual Studio
		16	+if ( MSVC )
		17	+ SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
		18	+ SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
		19	+endif ( MSVC )
		20	+#MAYBE REMOVE-----------------
		21	+#set C++11 flags if using GCC
		22	+if( CMAKE_COMPILER_IS_GNUCC )
		23	+# SET( CMAKE_CXX_FLAGS "-std=c++11")
		24	+ set(CMAKE_CXX_FLAGS "-std=c++11 -D_FORCE_INLINES")
		25	+# SET( CUDA_NVCC_FLAGS "-std=c++11")
		26	+endif( CMAKE_COMPILER_IS_GNUCC )
		27	+
		28	+SET( CUDA_NVCC_FLAGS "--gpu-architecture=compute_50 --gpu-code=sm_50,compute_50")
		29	+#-----------------------------
		30	+
		31	+
		32	+
		33	+#find packages-----------------------------------
		34	+#find OpenCV
		35	+find_package(OpenCV REQUIRED)
		36	+add_definitions(-DUSING_OPENCV)
		37	+
		38	+#find the pthreads package
		39	+find_package(Threads)
		40	+
		41	+#find the X11 package
		42	+find_package(X11)
		43	+
		44	+#find the STIM library
		45	+find_package(STIM)
		46	+
		47	+#find CUDA, mostly for LA stuff using cuBLAS
		48	+find_package(CUDA REQUIRED)
		49	+
		50	+#find Boost for Unix-based file lists
		51	+if( CMAKE_COMPILER_IS_GNUCC )
		52	+ find_package(Boost COMPONENTS filesystem system)
		53	+ if(Boost_FOUND)
		54	+ include_directories(${Boost_INCLUDE_DIR})
		55	+ else()
		56	+ message(FATAL_ERROR "HSIproc requires Boost::filesystem and Boost::system when using GCC")
		57	+ endif()
		58	+endif()
		59	+
		60	+#find FANN
		61	+#find_package(FANN REQUIRED)
		62	+
		63	+#find the GLUT library for visualization
		64	+#find_package(OpenGL REQUIRED)
		65	+#find_package(GLUT REQUIRED)
		66	+#if(WIN32)
		67	+# find_package(GLEW REQUIRED)
		68	+# include_directories(${GLEW_INCLUDE_DIR})
		69	+#endif(WIN32)
		70	+
		71	+#find LAPACK and supporting link_libraries
		72	+find_package(LAPACKE REQUIRED)
		73	+
		74	+#include include directories
		75	+include_directories(${CUDA_INCLUDE_DIRS}
		76	+ ${OpenCV_INCLUDE_DIRS}
		77	+ ${LAPACKE_INCLUDE_DIR}
		78	+ ${STIM_INCLUDE_DIRS}
		79	+ ${OpenGL_INCLUDE_DIRS}
		80	+# ${GLUT_INCLUDE_DIR}
		81	+ ${FANN_INCLUDE_DIRS}
		82	+ "${CMAKE_SOURCE_DIR}/src"
		83	+)
		84	+
		85	+#Assign a variable for all of the header files in this project
		86	+include_directories("${CMAKE_SOURCE_DIR}/src")
		87	+#file(GLOB GACPU_H "${CMAKE_SOURCE_DIR}/src/gacpu/*.h")
		88	+file(GLOB GAGPU_H "${CMAKE_SOURCE_DIR}/src/*.h")
		89	+#file(GLOB GA_H "${CMAKE_SOURCE_DIR}/src/*.h")
		90	+
		91	+#Assign source files to the appropriate variables to easily associate them with executables
		92	+#file(GLOB GA_CPU_SRC "${CMAKE_SOURCE_DIR}/src/gacpu/*.cpp")
		93	+file(GLOB GA_GPU_SRC "${CMAKE_SOURCE_DIR}/src/.c")
		94	+
		95	+
		96	+#create an executable file
		97	+cuda_add_executable(ga-gpu
		98	+ ${GAGPU_H}
		99	+# ${GA_H}
		100	+ ${GA_GPU_SRC}
		101	+)
		102	+target_link_libraries(ga-gpu ${CUDA_LIBRARIES}
		103	+ ${CUDA_CUBLAS_LIBRARIES}
		104	+ ${CUDA_CUFFT_LIBRARIES}
		105	+ ${LAPACKE_LIBRARIES}
		106	+ ${LAPACK_LIBRARIES}
		107	+ ${BLAS_LIBRARIES}
		108	+ ${CMAKE_THREAD_LIBS_INIT}
		109	+ ${X11_LIBRARIES}
		110	+ ${OpenCV_LIBS}
		111	+)
		112	+
		113	+
		114	+#create the PROC executable----------------------------------------------
		115	+
		116	+#create an executable file
		117	+#add_executable(hsiga
		118	+# ${GACPU_H}
		119	+# ${GA_H}
		120	+# ${GA_CPU_SRC}
		121	+#)
		122	+#target_link_libraries(hsiga ${LAPACKE_LIBRARIES}
		123	+# ${LAPACK_LIBRARIES}
		124	+# ${BLAS_LIBRARIES}
		125	+# ${CMAKE_THREAD_LIBS_INIT}
		126	+# ${X11_LIBRARIES}
		127	+# ${OpenCV_LIBS}
		128	+#)
		129	+
		130	+
		131	+
		132	+#if Boost is found, set an environment variable to use with preprocessor directives
		133	+if(Boost_FILESYSTEM_FOUND)
		134	+# if(BUILD_GACPU)
		135	+# target_link_libraries(hsiga ${Boost_FILESYSTEM_LIBRARIES}
		136	+# ${Boost_SYSTEM_LIBRARY}
		137	+# )
		138	+ #message(${Boost_FILESYSTEM_LIBRARIES})
		139	+# endif(BUILD_GACPU)
		140	+# if(BUILD_GAGPU)
		141	+ target_link_libraries(ga-gpu ${Boost_FILESYSTEM_LIBRARIES}
		142	+ ${Boost_SYSTEM_LIBRARY}
		143	+ )
		144	+# endif(BUILD_GAGPU)
		145	+endif(Boost_FILESYSTEM_FOUND)

FindGLEW.cmake 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/FindGLEW.cmake
		1	+# Copyright (c) 2012-2016 DreamWorks Animation LLC
		2	+#
		3	+# All rights reserved. This software is distributed under the
		4	+# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ )
		5	+#
		6	+# Redistributions of source code must retain the above copyright
		7	+# and license notice and the following restrictions and disclaimer.
		8	+#
		9	+# * Neither the name of DreamWorks Animation nor the names of
		10	+# its contributors may be used to endorse or promote products derived
		11	+# from this software without specific prior written permission.
		12	+#
		13	+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
		14	+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
		15	+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
		16	+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
		17	+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL,
		18	+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
		19	+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		20	+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		21	+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		22	+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
		23	+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		24	+# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE
		25	+# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00.
		26	+#
		27	+
		28	+#--cmake--
		29	+# - Find GLEW
		30	+#
		31	+# Author : Nicholas Yue yue.nicholas@gmail.com
		32	+#
		33	+# This auxiliary CMake file helps in find the GLEW headers and libraries
		34	+#
		35	+# GLEW_FOUND set if Glew is found.
		36	+# GLEW_INCLUDE_DIR GLEW's include directory
		37	+# GLEW_glew_LIBRARY GLEW libraries
		38	+# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context)
		39	+
		40	+FIND_PACKAGE ( PackageHandleStandardArgs )
		41	+
		42	+FIND_PATH( GLEW_LOCATION include/GL/glew.h
		43	+ "$ENV{GLEW_ROOT}"
		44	+ NO_DEFAULT_PATH
		45	+ NO_SYSTEM_ENVIRONMENT_PATH
		46	+ )
		47	+
		48	+FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW
		49	+ REQUIRED_VARS GLEW_LOCATION
		50	+ )
		51	+
		52	+IF ( GLEW_LOCATION )
		53	+
		54	+ SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path")
		55	+
		56	+ SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
		57	+ IF (GLEW_USE_STATIC_LIBS)
		58	+ IF (APPLE)
		59	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
		60	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
		61	+ NO_DEFAULT_PATH
		62	+ NO_SYSTEM_ENVIRONMENT_PATH
		63	+ )
		64	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
		65	+ NO_DEFAULT_PATH
		66	+ NO_SYSTEM_ENVIRONMENT_PATH
		67	+ )
		68	+ # MESSAGE ( "APPLE STATIC" )
		69	+ # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
		70	+ ELSEIF (WIN32)
		71	+ # Link library
		72	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
		73	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib )
		74	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib )
		75	+ ELSE (APPLE)
		76	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
		77	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
		78	+ NO_DEFAULT_PATH
		79	+ NO_SYSTEM_ENVIRONMENT_PATH
		80	+ )
		81	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
		82	+ NO_DEFAULT_PATH
		83	+ NO_SYSTEM_ENVIRONMENT_PATH
		84	+ )
		85	+ # MESSAGE ( "LINUX STATIC" )
		86	+ # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
		87	+ ENDIF (APPLE)
		88	+ ELSE ()
		89	+ IF (APPLE)
		90	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib")
		91	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib )
		92	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib )
		93	+ ELSEIF (WIN32)
		94	+ # Link library
		95	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
		96	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib )
		97	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib )
		98	+ # Load library
		99	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll")
		100	+ FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin
		101	+ NO_DEFAULT_PATH
		102	+ NO_SYSTEM_ENVIRONMENT_PATH
		103	+ )
		104	+ FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin
		105	+ NO_DEFAULT_PATH
		106	+ NO_SYSTEM_ENVIRONMENT_PATH
		107	+ )
		108	+ ELSE (APPLE)
		109	+ # Unices
		110	+ FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
		111	+ NO_DEFAULT_PATH
		112	+ NO_SYSTEM_ENVIRONMENT_PATH
		113	+ )
		114	+ FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
		115	+ NO_DEFAULT_PATH
		116	+ NO_SYSTEM_ENVIRONMENT_PATH
		117	+ )
		118	+ ENDIF (APPLE)
		119	+ ENDIF ()
		120	+ # MUST reset
		121	+ SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES})
		122	+
		123	+ SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library")
		124	+ SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library")
		125	+
		126	+ENDIF ()

FindGLUT.cmake 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/FindGLUT.cmake
		1	+#.rst:
		2	+# FindGLUT
		3	+# --------
		4	+#
		5	+# try to find glut library and include files.
		6	+#
		7	+# IMPORTED Targets
		8	+# ^^^^^^^^^^^^^^^^
		9	+#
		10	+# This module defines the :prop_tgt:`IMPORTED` targets:
		11	+#
		12	+# ``GLUT::GLUT``
		13	+# Defined if the system has GLUT.
		14	+#
		15	+# Result Variables
		16	+# ^^^^^^^^^^^^^^^^
		17	+#
		18	+# This module sets the following variables:
		19	+#
		20	+# ::
		21	+#
		22	+# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc.
		23	+# GLUT_LIBRARIES, the libraries to link against
		24	+# GLUT_FOUND, If false, do not try to use GLUT.
		25	+#
		26	+# Also defined, but not for general use are:
		27	+#
		28	+# ::
		29	+#
		30	+# GLUT_glut_LIBRARY = the full path to the glut library.
		31	+# GLUT_Xmu_LIBRARY = the full path to the Xmu library.
		32	+# GLUT_Xi_LIBRARY = the full path to the Xi Library.
		33	+
		34	+#=============================================================================
		35	+# Copyright 2001-2009 Kitware, Inc.
		36	+#
		37	+# Distributed under the OSI-approved BSD License (the "License");
		38	+# see accompanying file Copyright.txt for details.
		39	+#
		40	+# This software is distributed WITHOUT ANY WARRANTY; without even the
		41	+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
		42	+# See the License for more information.
		43	+#=============================================================================
		44	+# (To distribute this file outside of CMake, substitute the full
		45	+# License text for the above reference.)
		46	+
		47	+if (WIN32)
		48	+ find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h
		49	+ PATHS $ENV{GLUT_ROOT_PATH}/include )
		50	+
		51	+ if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
		52	+ find_library( GLUT_glut_LIBRARY NAMES freeglut
		53	+ PATHS
		54	+ $ENV{GLUT_ROOT_PATH}/lib/x64
		55	+
		56	+ NO_DEFAULT_PATH
		57	+ )
		58	+ else( CMAKE_SIZEOF_VOID_P EQUAL 8 )
		59	+ find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut
		60	+ PATHS
		61	+ ${OPENGL_LIBRARY_DIR}
		62	+ $ENV{GLUT_ROOT_PATH}/lib
		63	+ )
		64	+ endif( CMAKE_SIZEOF_VOID_P EQUAL 8 )
		65	+
		66	+else ()
		67	+
		68	+ if (APPLE)
		69	+ find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR})
		70	+ find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX")
		71	+ find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX")
		72	+
		73	+ if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa)
		74	+ add_library(GLUT::Cocoa UNKNOWN IMPORTED)
		75	+ # Cocoa should always be a Framework, but we check to make sure.
		76	+ if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$")
		77	+ set_target_properties(GLUT::Cocoa PROPERTIES
		78	+ IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}")
		79	+ else()
		80	+ set_target_properties(GLUT::Cocoa PROPERTIES
		81	+ IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}")
		82	+ endif()
		83	+ endif()
		84	+ else ()
		85	+
		86	+ if (BEOS)
		87	+
		88	+ set(_GLUT_INC_DIR /boot/develop/headers/os/opengl)
		89	+ set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86)
		90	+
		91	+ else()
		92	+
		93	+ find_library( GLUT_Xi_LIBRARY Xi
		94	+ /usr/openwin/lib
		95	+ )
		96	+
		97	+ find_library( GLUT_Xmu_LIBRARY Xmu
		98	+ /usr/openwin/lib
		99	+ )
		100	+
		101	+ if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi)
		102	+ add_library(GLUT::Xi UNKNOWN IMPORTED)
		103	+ set_target_properties(GLUT::Xi PROPERTIES
		104	+ IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}")
		105	+ endif()
		106	+
		107	+ if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu)
		108	+ add_library(GLUT::Xmu UNKNOWN IMPORTED)
		109	+ set_target_properties(GLUT::Xmu PROPERTIES
		110	+ IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}")
		111	+ endif()
		112	+
		113	+ endif ()
		114	+
		115	+ find_path( GLUT_INCLUDE_DIR GL/glut.h
		116	+ /usr/include/GL
		117	+ /usr/openwin/share/include
		118	+ /usr/openwin/include
		119	+ /opt/graphics/OpenGL/include
		120	+ /opt/graphics/OpenGL/contrib/libglut
		121	+ ${_GLUT_INC_DIR}
		122	+ )
		123	+
		124	+ find_library( GLUT_glut_LIBRARY glut
		125	+ /usr/openwin/lib
		126	+ ${_GLUT_glut_LIB_DIR}
		127	+ )
		128	+
		129	+ unset(_GLUT_INC_DIR)
		130	+ unset(_GLUT_glut_LIB_DIR)
		131	+
		132	+ endif ()
		133	+
		134	+endif ()
		135	+
		136	+FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR)
		137	+
		138	+if (GLUT_FOUND)
		139	+ # Is -lXi and -lXmu required on all platforms that have it?
		140	+ # If not, we need some way to figure out what platform we are on.
		141	+ set( GLUT_LIBRARIES
		142	+ ${GLUT_glut_LIBRARY}
		143	+ ${GLUT_Xmu_LIBRARY}
		144	+ ${GLUT_Xi_LIBRARY}
		145	+ ${GLUT_cocoa_LIBRARY}
		146	+ )
		147	+
		148	+ if(NOT TARGET GLUT::GLUT)
		149	+ add_library(GLUT::GLUT UNKNOWN IMPORTED)
		150	+ set_target_properties(GLUT::GLUT PROPERTIES
		151	+ INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}")
		152	+ if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$")
		153	+ set_target_properties(GLUT::GLUT PROPERTIES
		154	+ IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}")
		155	+ else()
		156	+ set_target_properties(GLUT::GLUT PROPERTIES
		157	+ IMPORTED_LOCATION "${GLUT_glut_LIBRARY}")
		158	+ endif()
		159	+
		160	+ if(TARGET GLUT::Xmu)
		161	+ set_property(TARGET GLUT::GLUT APPEND
		162	+ PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu)
		163	+ endif()
		164	+
		165	+ if(TARGET GLUT::Xi)
		166	+ set_property(TARGET GLUT::GLUT APPEND
		167	+ PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi)
		168	+ endif()
		169	+
		170	+ if(TARGET GLUT::Cocoa)
		171	+ set_property(TARGET GLUT::GLUT APPEND
		172	+ PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa)
		173	+ endif()
		174	+ endif()
		175	+
		176	+ #The following deprecated settings are for backwards compatibility with CMake1.4
		177	+ set (GLUT_LIBRARY ${GLUT_LIBRARIES})
		178	+ set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR})
		179	+endif()
		180	+
		181	+mark_as_advanced(
		182	+ GLUT_INCLUDE_DIR
		183	+ GLUT_glut_LIBRARY
		184	+ GLUT_Xmu_LIBRARY
		185	+ GLUT_Xi_LIBRARY
		186	+ )

FindLAPACKE.cmake 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/FindLAPACKE.cmake
		1	+# - Try to find LAPACKE
		2	+#
		3	+# Once done this will define
		4	+# LAPACKE_FOUND - System has LAPACKE
		5	+# LAPACKE_INCLUDE_DIRS - The LAPACKE include directories
		6	+# LAPACKE_LIBRARIES - The libraries needed to use LAPACKE
		7	+# LAPACKE_DEFINITIONS - Compiler switches required for using LAPACKE
		8	+#
		9	+# Usually, LAPACKE requires LAPACK and the BLAS. This module does
		10	+# not enforce anything about that.
		11	+
		12	+find_path(LAPACKE_INCLUDE_DIR
		13	+ NAMES lapacke.h
		14	+ PATHS $ENV{LAPACK_PATH} ${INCLUDE_INSTALL_DIR}
		15	+ PATHS ENV INCLUDE)
		16	+
		17	+find_library(LAPACKE_LIBRARY liblapacke lapacke
		18	+ PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
		19	+ PATHS ENV LIBRARY_PATH
		20	+ PATHS ENV LD_LIBRARY_PATH)
		21	+
		22	+if(MSVC)
		23	+ find_library(LAPACK_LIBRARY liblapack lapack
		24	+ PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
		25	+ PATHS ENV LIBRARY_PATH
		26	+ PATHS ENV LD_LIBRARY_PATH)
		27	+
		28	+ find_library(BLAS_LIBRARY libblas blas
		29	+ PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
		30	+ PATHS ENV LIBRARY_PATH
		31	+ PATHS ENV LD_LIBRARY_PATH)
		32	+
		33	+else()
		34	+ find_library(LAPACK REQUIRED)
		35	+ find_library(BLAS REQUIRED)
		36	+endif()
		37	+set(LAPACKE_LIBRARIES ${LAPACKE_LIBRARY} ${LAPACK_LIBRARY} ${BLAS_LIBRARY})
		38	+
		39	+include(FindPackageHandleStandardArgs)
		40	+find_package_handle_standard_args(LAPACKE DEFAULT_MSG
		41	+ LAPACKE_INCLUDE_DIR
		42	+ LAPACKE_LIBRARIES)
		43	+mark_as_advanced(LAPACKE_INCLUDE_DIR LAPACKE_LIBRARIES)

FindSTIM.cmake 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/FindSTIM.cmake
		1	+# finds the STIM library (downloads it if it isn't present)
		2	+# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
		3	+
		4	+include(FindPackageHandleStandardArgs)
		5	+
		6	+set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
		7	+
		8	+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
		9	+
		10	+if(STIM_FOUND)
		11	+ set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
		12	+elseif(STIM_FOUND)
		13	+ #if the STIM library isn't found, download it
		14	+ #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists
		15	+ #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git")
		16	+ #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
		17	+ #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH)
		18	+ message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.")
		19	+ message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib")
		20	+endif(STIM_FOUND)
		21	+
		22	+find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)

src/basic_functions.h 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/src/basic_functions.h
		1	+#include <stdio.h>
		2	+
		3	+
		4	+size_t* sortIndx(float* input, size_t size){
		5	+ //sort indices of score in ascending order (fitness value)
		6	+ size_t *idx;
		7	+ idx = (size_t) malloc (size sizeof (size_t));
		8	+ for (size_t i = 0; i < size; i++)
		9	+ idx[i] = i;
		10	+
		11	+ for (size_t i=0; i<size; i++){
		12	+ for (size_t j=i+1; j<size; j++){
		13	+ if (input[idx[i]] < input[idx[j]]){
		14	+ std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
		15	+ }
		16	+ }
		17	+ }
		18	+ return idx; //use as sortSIdx in selection
		19	+}
		20	+
		21	+
		22	+template<typename T>
		23	+void mtxMul(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){
		24	+ //compute output matrix M3 of size row1 X column2 and data is column major
		25	+ for(size_t i = 0 ; i <r1; i++){
		26	+ for(size_t j = 0; j< c2; j++){
		27	+ T temp = 0;
		28	+ for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication
		29	+ temp+= M1[i * c1 + k] * M2[k * c2 + j]; //compute an element of output matrix
		30	+ }
		31	+ M3[i * c1 + j] = temp; //copy an element to output matrix
		32	+ }
		33	+ }
		34	+}
		35	+
		36	+template<typename T>
		37	+void mtxMultranspose(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){
		38	+ //compute output matrix M3 of size row1 X column2 and data is column major
		39	+ for(size_t i = 0 ; i <r1; i++){
		40	+ for(size_t j = 0; j< r2; j++){
		41	+ T temp = 0;
		42	+ for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication
		43	+ temp+= M1[i * c1 + k] * M2[j * c2 + k]; //compute an element of output matrix
		44	+ }
		45	+ M3[i * r1 + j] = temp; //copy an element to output matrix
		46	+ }
		47	+ }
		48	+}
		49	+
		50	+ //display within class scatter
		51	+template<typename T>
		52	+void displayS(T* sw, size_t f){
		53	+
		54	+ for(size_t g = 0; g<1; g++){
		55	+ std::cout<<std::endl;
		56	+ for(size_t j = 0; j < f; j++){ //total number of features in a gnome
		57	+ for(size_t k = 0; k < f; k++){ //total number of features in a gnome
		58	+ std::cout<<sw[gff + j*f + k]<<" ";
		59	+ }
		60	+ std::cout<<std::endl;
		61	+ }
		62	+ }
		63	+ std::cout<<std::endl;
		64	+}
		65	+
		66	+//sort eigenvalues from lapacke results
		67	+size_t* sortEigenVectorIndx(float* eigenvalue, size_t N){
		68	+ //sort indices of score in ascending order (fitness value)
		69	+ size_t idx = (size_t) malloc (N * sizeof (size_t));
		70	+ for (size_t i = 0; i < N; i++)
		71	+ idx[i] = i;
		72	+
		73	+ for (size_t i=0; i<N; i++){
		74	+ for (size_t j=i+1; j<N; j++){
		75	+ if (eigenvalue[idx[i]] > eigenvalue[idx[j]]){
		76	+ std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
		77	+ }
		78	+ }
		79	+ }
		80	+
		81	+ std::cout<<"best eigenvalue index: "<<eigenvalue[idx[0]]<<std::endl;
		82	+
		83	+ return idx; //use as sortSIdx in selection
		84	+
		85	+}

src/enviload.h 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/src/enviload.h
		1	+#include <iostream>
		2	+#include <fstream>
		3	+#include <thread>
		4	+#include <random>
		5	+#include <vector>
		6	+//#include <algorithm>
		7	+
		8	+#define NOMINMAX
		9	+
		10	+//stim libraries
		11	+#include <stim/envi/envi.h>
		12	+#include <stim/image/image.h>
		13	+#include <stim/parser/arguments.h>
		14	+#include <stim/ui/progressbar.h>
		15	+#include <stim/parser/filename.h>
		16	+//#include <stim/visualization/colormap.h>
		17	+#include <stim/parser/table.h>
		18	+
		19	+std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel#
		20	+//loads spectral features into a feature matrix based on a set of class images (or masks)
		21	+float* load_features(size_t nC, size_t tP, size_t B, stim::envi E, std::vector< unsigned int > nP){
		22	+ float progress = 0; //initialize the progress bar variable
		23	+ unsigned long long bytes_fmat = sizeof(float) * tP * B; //calculate the number of bytes in the feature matrix
		24	+ std::cout<<"totalnumber of samples "<<tP<<std::endl;
		25	+ std::cout<<"Allocating space for the feature matrix: "<<tP<<" x "<<B<<" = "<<(float)bytes_fmat/(float)1048576<<"MB"<<std::endl;
		26	+ float* F = (float*) malloc(bytes_fmat); //allocate space for the sifted matrix
		27	+ std::cout<<"Loading Training Data ("<<nC<<" classes)"<<std::endl;
		28	+ //load all of the training spectra into an array
		29	+ unsigned long long F_idx = 0; //initialize the matrix index to 0
		30	+ //unsigned long long R_idx = 0;
		31	+ for(unsigned c = 0; c < nC; c++){ //for each class image
		32	+ std::cout<<"\tSifting class "<<c+1<<" = "<<nP[c]<<" pixels..."<<std::endl;
		33	+ // std::thread t1 = std::thread(progress_thread_envi, &E); //start the progress bar thread
		34	+ E.sift((void*)&F[F_idx], C[c].data(), true); //sift that class into the matrix at the proper location
		35	+ F_idx += nP[c] * B;
		36	+ progress = (float)(c+1) / (float)nC * 100;
		37	+ // t1.join();
		38	+ }
		39	+
		40	+ return F;
		41	+}
		42	+
		43	+/// Load responses for a Random Forest Classifier
		44	+unsigned int* ga_load_responses(size_t tP, size_t nC, std::vector< unsigned int > nP){
		45	+ unsigned int* T = (unsigned int)malloc(tPsizeof(unsigned int)); //generate an OpenCV vector of responses
		46	+ size_t R_idx = 0; //index into the response array
		47	+ for(size_t c = 0; c < nC; c++){ //for each class image
		48	+ for(unsigned long long l = 0; l < nP[c]; l++){ //assign a response for all pixels of class c loaded in the training matrix
		49	+ T[R_idx + l] = (unsigned int)c+1;
		50	+ }
		51	+ R_idx += nP[c]; //increment the response vector index
		52	+ }
		53	+ return T;
		54	+}
		55	+
		56	+
		57	+//loads the necessary data for training a random forest classifier
		58	+std::vector< unsigned int > ga_load_class_images(int argc, stim::arglist args, size_t* nC, size_t* tP){
		59	+ if(args["classes"].nargs() < 2){ //if fewer than two classes are specified, there's a problem
		60	+ std::cout<<"ERROR: training requires at least two class masks"<<std::endl;
		61	+ exit(1);
		62	+ }
		63	+ std::vector< unsigned int > nP;
		64	+ size_t num_images = args["classes"].nargs(); //count the number of class images
		65	+ //size_t num_images = args["rf"].nargs(); //count the number of class images
		66	+ //std::vector<std::string> filenames(num_images); //initialize an array of file names to store the names of the images
		67	+ std::string filename; //allocate space to store the filename for an image
		68	+ for(size_t c = 0; c < num_images; c++){ //for each image
		69	+ filename = args["classes"].as_string(c);; //get the class image file name
		70	+ stim::image<unsigned char> image(filename); //load the image
		71	+ //push_training_image(image.channel(0), nC, tP, nP); //push channel zero (all class images are assumed to be single channel)
		72	+ C.push_back(image.channel(0));
		73	+ unsigned int npixels = (unsigned int)image.channel(0).nnz();
		74	+ nP.push_back(npixels); //push the number of pixels onto the pixel array
		75	+ *tP += npixels; //add to the running total of pixels
		76	+ nC = nC + 1;
		77	+ }
		78	+
		79	+ return nP;
		80	+}
		81	+
		82	+void display_PixelfeatureNclass(float* F, unsigned int* T, size_t B, size_t Idx){
		83	+ //display code for debug, displaying Idx th pixel from feature matrix F with all features B
		84	+ std::cout<<"class of pixel["<<Idx<<"]" <<"is: "<<T[Idx]<<std::endl;
		85	+ std::cout<<"feature["<<Idx<<"] is: "<<std::endl;
		86	+ for (size_t i = 0; i< B; i++)
		87	+ std::cout<<" "<<F[Idx * B + i];
		88	+}
		89	+
		90	+
		91	+void display_args(int argc, stim::arglist args){
		92	+ std::cout<<"number of arguments "<<argc<<std::endl;
		93	+ std::cout<<"arg 0 "<<args.arg(0)<<std::endl;
		94	+ std::cout<<"arg 1 "<<args.arg(1)<<std::endl;
		95	+}
		96	+
		97	+void display_dataSize(size_t X, size_t Y, size_t B){
		98	+ std::cout<<"number of samples "<<X*Y<<std::endl;
		99	+ std::cout<<"number of bands "<<B<<std::endl;
		100	+
		101	+}
		102	+
		103	+void display_phe(float* phe, unsigned int* P, size_t p,size_t f, size_t i, size_t j){
		104	+ //display code for debug, displaying jth pixel from new feature matrix which is created for gnome i
		105	+ std::cout<<"phe["<<i<<"]["<<j<<"]"<<std::endl;
		106	+ for(unsigned int n = 0; n < f; n++){
		107	+ std::cout<<P[i * f + n]; //spectral feature indices from gnome i of current population
		108	+ std::cout<<" "<<phe[i* (p * f) +j * f + n]<<std::endl; //display 100th pixel value corresponding to feature indices in the gnome
		109	+
		110	+ }
		111	+}
		112	+
		113	+
		114	+void display_gnome(unsigned int* P,size_t f,size_t gIdx){
		115	+ //display code for debug, displaying gnome gIdx of current population, gnome is subset of feature indices
		116	+ for (size_t i = 0; i< f; i++)
		117	+ std::cout<<" "<<P[gIdx * f + i];
		118	+}
		119	+

src/ga_gpu.cu 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/src/ga_gpu.cu
		1	+#ifndef GA_GPU_CU
		2	+#define GA_GPU_CU
		3	+
		4	+//#include <cuda.h>
		5	+//#include "cuda_runtime.h"
		6	+//#include <cuda_runtime_api.h>
		7	+//#include "device_launch_parameters.h"
		8	+#include <stim/cuda/cudatools/error.h>
		9	+
		10	+#include "timer.h"
		11	+//#include <stdio.h>
		12	+//#include <stdlib.h>
		13	+#include <iostream>
		14	+#include <fstream>
		15	+
		16	+extern Timer timer;
		17	+
		18	+
		19	+__global__ void kernel_computeSb(float* gpuSb, unsigned int* gpuP, float* gpuM, float* gpuCM, size_t ub, size_t f, size_t p, size_t nC, unsigned int* gpu_nPxInCls){
		20	+
		21	+ size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix
		22	+ size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome
		23	+ size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //if we use 3d grid then it is needed
		24	+
		25	+
		26	+ if(gnomeIndx >= p \|\| i >= f \|\| j >= f) return; //handling segmentation fault
		27	+
		28	+ //form a sb matrix from vector sbVec, multiply each element in matrix with num of pixels in the current class
		29	+ //and add it to previous value of between class scatter matrix sb
		30	+ float tempsbval;
		31	+ size_t n1;
		32	+ size_t n2;
		33	+ size_t classIndx; //class index in class mean matrix
		34	+
		35	+ for(size_t c = 0; c < nC; c++){
		36	+ tempsbval = 0;
		37	+ classIndx = c * ub;
		38	+ n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix
		39	+ n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix
		40	+ tempsbval = ((gpuCM[classIndx + n1] - gpuM[n1]) (gpuCM[classIndx + n2] - gpuM[n2])) (float)gpu_nPxInCls[c] ;
		41	+ gpuSb[gnomeIndx * f * f + j * f + i] += tempsbval;
		42	+ }
		43	+}
		44	+
		45	+
		46	+//Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f)
		47	+__global__ void kernel_computeSw(float* gpuSw, unsigned int* gpuP, float* gpuCM, float* gpuF, unsigned int* gpuT, size_t ub, size_t f, size_t p, size_t nC, size_t tP){
		48	+ size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix
		49	+ size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome
		50	+ size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //total number of individuals
		51	+
		52	+ if(gnomeIndx >= p \|\| i >= f \|\| j >= f) return; //handling segmentation fault
		53	+ float tempswval;
		54	+
		55	+ size_t n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix
		56	+ size_t n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix
		57	+ tempswval = 0;
		58	+ for(size_t c = 0; c < nC; c++){
		59	+ tempswval = 0;
		60	+ for(size_t k = 0; k < tP; k++){
		61	+ if(gpuT[k] == (c+1) ){
		62	+ tempswval += ((gpuF[ k * ub + n1] - gpuCM[c * ub + n1]) * (gpuF[k * ub + n2] - gpuCM[c * ub + n2]));
		63	+ }
		64	+ }
		65	+ gpuSw[gnomeIndx * f * f + j * f + i] += tempswval;
		66	+ }
		67	+}
		68	+
		69	+
		70	+
		71	+
		72	+ //=============================gpu intialization=============================================
		73	+ /// Initialize all GPU pointers used in the GA-GPU algorithm
		74	+ /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population
		75	+ /// @param p is the population size
		76	+ /// @param f is the number of desired features
		77	+ /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean
		78	+ /// @param cpuM is a pointer to the class mean on the CPU
		79	+ /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class
		80	+ /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class
		81	+ /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices
		82	+ /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices
		83	+ /// @param gpuF is the destination for the GPU feature matrix
		84	+ /// @param cpuF is the complete feature matrix on the CPU
		85	+
		86	+ void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation
		87	+ float** gpuCM, float* cpuCM, size_t nC, unsigned int ub,
		88	+ float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls,
		89	+ float gpuSb, float gpuSw,
		90	+ float** gpuF, float* cpuF,
		91	+ unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls){
		92	+
		93	+ HANDLE_ERROR(cudaMalloc(gpuP, p * f * sizeof(unsigned int))); //allocate space for the population on the GPU
		94	+
		95	+ HANDLE_ERROR(cudaMalloc(gpuCM, nC * ub * sizeof(float))); //allocate space for the class mean and copy it to the GPU
		96	+ HANDLE_ERROR(cudaMemcpy(gpuCM, cpuCM, nC ub * sizeof(float), cudaMemcpyHostToDevice));
		97	+
		98	+
		99	+ HANDLE_ERROR(cudaMalloc(gpuM, ub * sizeof(float))); //allocate space for the mean of the feature matrix
		100	+ HANDLE_ERROR(cudaMemcpy(gpuM, cpuM, ub sizeof(float), cudaMemcpyHostToDevice));
		101	+
		102	+ HANDLE_ERROR(cudaMalloc(gpu_nPxInCls, nC * sizeof(unsigned int))); //number of pixels in each class
		103	+ HANDLE_ERROR(cudaMemcpy(gpu_nPxInCls, cpu_nPxInCls, nC sizeof(unsigned int), cudaMemcpyHostToDevice));
		104	+
		105	+
		106	+ HANDLE_ERROR(cudaMalloc(gpuSb, p * f * f * sizeof(float))); //allocate memory for sb which is calculated for eery class separately and added together in different kernel
		107	+ HANDLE_ERROR(cudaMalloc(gpuSw, p * f * f * sizeof(float)));
		108	+
		109	+ HANDLE_ERROR(cudaMalloc(gpuF, tP * ub * sizeof(float)));
		110	+ HANDLE_ERROR(cudaMemcpy(gpuF, cpuF, tP ub * sizeof(float), cudaMemcpyHostToDevice));
		111	+
		112	+ HANDLE_ERROR(cudaMalloc(gpuT, tP * sizeof(unsigned int)));
		113	+ HANDLE_ERROR(cudaMemcpy(gpuT, cpuT, tP sizeof(unsigned int), cudaMemcpyHostToDevice));
		114	+
		115	+ }
		116	+
		117	+ //computation on GPU
		118	+ /// Initialize all GPU pointers used in the GA-GPU algorithm
		119	+ /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population
		120	+ /// @param p is the population size
		121	+ /// @param f is the number of desired features
		122	+ /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices
		123	+ /// @param cpuSb is the between-class scatter matrix on the GPU (this function will copy the GPU result there)
		124	+ /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices
		125	+ /// @param cpuSw is the within-class scatter matrix on the GPU (this function will copy the GPU result there)
		126	+
		127	+ /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean
		128	+ /// @param cpuM is a pointer to the class mean on the CPU
		129	+ /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class
		130	+ /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class
		131	+
		132	+ /// @param gpuF is the destination for the GPU feature matrix
		133	+ /// @param cpuF is the complete feature matrix on the CPU
		134	+ void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f,
		135	+ float* gpuSb, float* cpuSb,
		136	+ float* gpuSw, float* cpuSw,
		137	+ float* gpuF, unsigned int* gpuT,float* gpuM, float* gpuCM,
		138	+ size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile){
		139	+
		140	+ timer.start();
		141	+ HANDLE_ERROR(cudaMemcpy(gpuP, cpuP, p * f * sizeof(unsigned int), cudaMemcpyHostToDevice));
		142	+ HANDLE_ERROR(cudaMemset(gpuSb, 0, p * f * f * sizeof(float)));
		143	+
		144	+ //grid configuration of GPU
		145	+ size_t threads = (size_t)sqrt(props.maxThreadsPerBlock);
		146	+ if(threads > f) threads = f;
		147	+ size_t numberofblocksfor_f = (size_t)ceil((float)f/ threads);
		148	+ dim3 blockdim((int)threads, (int)threads, 1);
		149	+ dim3 griddim((int)numberofblocksfor_f, (int)numberofblocksfor_f, (int)p); //X dimension blocks will cover all gnomes of the population and each block will have as many gnomes as it can feet
		150	+ //sharedbytes calculation
		151	+ size_t sharedBytes = p * f * sizeof(unsigned int); //copy population to shared memory
		152	+ if(props.sharedMemPerBlock < sharedBytes) sharedBytes = props.sharedMemPerBlock;
		153	+
		154	+ //launch kernel to compute sb matrix
		155	+ kernel_computeSb<<<griddim, blockdim, sharedBytes>>>(gpuSb, gpuP, gpuM, gpuCM, ub, f, p, nC, gpu_nPxInCls);
		156	+ cudaDeviceSynchronize();
		157	+
		158	+ HANDLE_ERROR(cudaMemcpy(cpuSb, gpuSb, p * f * f * sizeof(float), cudaMemcpyDeviceToHost)); //copy between class scatter from gpu to cpu
		159	+ const auto elapsedg1 = timer.time_elapsed();
		160	+ if(gen > gnrtn -2){
		161	+ std::cout << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl;
		162	+ profilefile << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl;
		163	+ }
		164	+
		165	+ timer.start();
		166	+ //Compute within class scatter
		167	+ HANDLE_ERROR(cudaMemset(gpuSw, 0, p * f * f * sizeof(float)));
		168	+
		169	+ //launch kernel to compute sb matrix
		170	+ kernel_computeSw<<<griddim, blockdim>>>(gpuSw, gpuP, gpuCM, gpuF, gpuT, ub, f, p, nC, tP);
		171	+ cudaDeviceSynchronize();
		172	+ //copy between class scatter from gpu to cpu
		173	+ HANDLE_ERROR(cudaMemcpy(cpuSw, gpuSw, p * f * f * sizeof(float), cudaMemcpyDeviceToHost));
		174	+ const auto elapsedg2 = timer.time_elapsed();
		175	+ if(gen > gnrtn - 2){
		176	+ std::cout << "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl;
		177	+ profilefile<< "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl;
		178	+ }
		179	+
		180	+ }
		181	+
		182	+ //free all gpu pointers
		183	+ void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT){
		184	+
		185	+ HANDLE_ERROR(cudaFree(gpuP));
		186	+ HANDLE_ERROR(cudaFree(gpuCM));
		187	+ HANDLE_ERROR(cudaFree(gpuM));
		188	+ HANDLE_ERROR(cudaFree(gpu_nPxInCls));
		189	+ HANDLE_ERROR(cudaFree(gpuSb));
		190	+ HANDLE_ERROR(cudaFree(gpuSw));
		191	+ HANDLE_ERROR(cudaFree(gpuF));
		192	+ HANDLE_ERROR(cudaFree(gpuT));
		193	+ }
		194	+
		195	+#endif
		196	+

src/ga_gpu.h 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/src/ga_gpu.h
		1	+#ifndef GA_GPU_H
		2	+#define GA_GPU_H
		3	+
		4	+#include <iostream>
		5	+#include <thread>
		6	+#include <complex>
		7	+#include <cv.h>
		8	+#include <stdio.h>
		9	+#include <stdlib.h>
		10	+#include <iostream>
		11	+
		12	+#include "timer.h"
		13	+
		14	+#include "basic_functions.h"
		15	+//LAPACKE support for Visual Studio
		16	+
		17	+#ifndef LAPACK_COMPLEX_CUSTOM
		18	+#define LAPACK_COMPLEX_CUSTOM
		19	+#define lapack_complex_float std::complex<float>
		20	+#define lapack_complex_double std::complex<double>
		21	+#include "lapacke.h"
		22	+#endif
		23	+
		24	+
		25	+#define LAPACK_ROW_MAJOR 101
		26	+#define LAPACK_COL_MAJOR 102
		27	+
		28	+//CUDA functions
		29	+void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation
		30	+ float** gpuCM, float* cpuCM, size_t nC, unsigned int ub,
		31	+ float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls,
		32	+ float gpuSb, float gpuSw,
		33	+ float** gpuF, float* cpuF,
		34	+ unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls);
		35	+void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f,
		36	+ float* gpuSb, float* cpuSb,
		37	+ float* gpuSw, float* cpuSw,
		38	+ float* gpuF, unsigned int* T, float* gpuM, float* gpuCM,
		39	+ size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile);
		40	+void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT);
		41	+
		42	+struct _fcomplex { float re, im; };
		43	+typedef struct _fcomplex fcomplex;
		44	+
		45	+Timer timer;
		46	+
		47	+class ga_gpu {
		48	+
		49	+public:
		50	+ float* F; //pointer to the raw data in host memory
		51	+ unsigned int* T; //pointer to the class labels in host memory
		52	+ size_t gnrtn; //total number of generations
		53	+ size_t p; //population size
		54	+ size_t f; // number of features to be selected
		55	+
		56	+ unsigned int* P; //pointer to population of current generation genotype matrix (p x f)
		57	+ float* S; //pointer to score(fitness value) of each gnome from current population matric P
		58	+ unsigned int* i_guess; //initial guess of features if mentioined in args add to initial population
		59	+ unsigned int ub; //upper bound for gnome value (maximum feature index from raw feature matrix F)
		60	+ unsigned int lb; //lower bound for gnome value (minimum feature index from raw feature matrix F = 0)
		61	+ float uniformRate;
		62	+ float mutationRate;
		63	+ size_t tournamentSize; //number of potential gnomes to select parent for crossover
		64	+ bool elitism; //if true then passes best gnome to next generation
		65	+
		66	+ //declare gpu pointers
		67	+ float* gpuF; //Feature matrix
		68	+ unsigned int* gpuT; //target responses of entire feature matrix
		69	+ unsigned int* gpuP; //population matrix
		70	+ unsigned int* gpu_nPxInCls;
		71	+ float* gpuCM; //class mean of entire feature matrix
		72	+ float* gpuM; //total mean of entire feature matrix
		73	+ float* gpuSb; //between class scatter for all individuals of current population
		74	+ float* gpuSw; //within class scatter for all individuals of current population
		75	+
		76	+ //constructor
		77	+ ga_gpu() {}
		78	+
		79	+ //==============================generate initial population
		80	+
		81	+ void initialize_population(std::vector<unsigned int> i_guess, bool debug) {
		82	+ if (debug) {
		83	+ std::cout << std::endl;
		84	+ std::cout << "initial populatyion is: " << std::endl;
		85	+ }
		86	+
		87	+ lb = 0;
		88	+ P = (unsigned int)calloc(p f, sizeof(unsigned int)); //allcate memory for genetic population(indices of features from F), p number of gnomes of size f
		89	+ S = (float*)calloc(p, sizeof(float)); //allcate memory for scores(fitness value) of each gnome from P
		90	+
		91	+ srand(1);
		92	+ //add intial guess to the population if specified by user as a output of other algorithm or by default just random guess
		93	+ std::memcpy(P, i_guess.data(), f * sizeof(unsigned int));
		94	+
		95	+ //generate random initial population
		96	+ for (size_t i1 = 1; i1 < p; i1++) {
		97	+ for (size_t i2 = 0; i2 < f; i2++) {
		98	+ P[i1 * f + i2] = rand() % ub + lb; //select element of gnome as random feature index within lower bound(0) and upper bound(B)
		99	+ if (debug) std::cout << P[i1 * f + i2] << "\t";
		100	+ }
		101	+ if (debug) std::cout << std::endl;
		102	+ }
		103	+ }
		104	+
		105	+ //===================generation of new population==========================================
		106	+
		107	+ size_t evolvePopulation(unsigned int* newPop, float* M, bool debug) {
		108	+
		109	+ //gget index of best gnome in the current population
		110	+ size_t bestG_Indx = gIdxbestGnome();
		111	+ //-------------(reproduction)-------
		112	+ if (elitism) {
		113	+ saveGnomeIdx(0, bestG_Indx, newPop); //keep best gnome from previous generation to new generation
		114	+ }
		115	+ // ------------Crossover population---------------
		116	+ int elitismOffset;
		117	+ if (elitism) {
		118	+ elitismOffset = 1;
		119	+ }
		120	+ else {
		121	+ elitismOffset = 0;
		122	+ }
		123	+
		124	+ //Do crossover for rest of population size
		125	+ for (int i = elitismOffset; i <p; i++) {
		126	+ // std::cout<<"crossover of gnome "<<i<<std::endl;
		127	+ std::vector<unsigned int>gnome1;
		128	+ gnome1.reserve(f);
		129	+ gnome1 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes
		130	+ // displaygnome(gnome1);
		131	+ std::vector<unsigned int>gnome2;
		132	+ gnome2.reserve(f);
		133	+ gnome2 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes
		134	+ // displaygnome(gnome2);
		135	+ std::vector<unsigned int>gnome;
		136	+ gnome.reserve(f);
		137	+ gnome = crossover(gnome1, gnome2, M); //Do crossover of above parent gnomes to produce new gnome
		138	+ // displaygnome(gnome);
		139	+ saveGnome(i, gnome, newPop); //save crosseover result to new population
		140	+ }
		141	+
		142	+ //--------------Mutate population------------
		143	+ // introduce some mutation in new population
		144	+ for (int i = elitismOffset; i <p; i++) {
		145	+ //std::cout<<"mutation of gnome"<<std::endl;
		146	+ std::vector<unsigned int>gnome;
		147	+ gnome.reserve(f);
		148	+
		149	+ for (size_t n = 0; n < f; n++)
		150	+ gnome.push_back(newPop[i*f + n]);
		151	+ //std::cout<<"\n starting address "<<(&newPop[0] + if)<<"\t end address is "<<(&newPop[0] + if + f-1) <<std::endl;
		152	+ //std::copy((&newPop[0] + if), (&newPop[0] + if +f-1), gnome.begin());
		153	+ // displaygnome(gnome);
		154	+ mutate(gnome);
		155	+ // displaygnome(gnome);
		156	+ saveGnome(i, gnome, newPop); //save new gnome to new population at position i
		157	+ }
		158	+ return bestG_Indx;
		159	+ }
		160	+
		161	+ //============================== functions for population evolution ===========================================================================
		162	+ std::vector<unsigned int> tournamentSelection(size_t tSize) {
		163	+ // Create a tournament population
		164	+ unsigned int* tournamentP = (unsigned int)malloc(tSize f * sizeof(unsigned int));
		165	+ std::vector<float>tournamentS;
		166	+
		167	+ // For each place in the tournament get a random individual
		168	+ for (size_t i = 0; i < tSize; i++) {
		169	+ size_t rndmIdx = rand() % p + lb;
		170	+ tournamentS.push_back(S[rndmIdx]);
		171	+ //for (size_t n = 0; n <f; n++)
		172	+ //tournamentP[i * f + n] = (getGnome(rndmIdx)).at(n);
		173	+ std::vector<unsigned int> temp_g(getGnome(rndmIdx));
		174	+ std::copy(temp_g.begin(), temp_g.end(), tournamentP + i*f);
		175	+ }
		176	+ // Get the fittest
		177	+ std::vector<unsigned int>fittestgnome;
		178	+ fittestgnome.reserve(f);
		179	+
		180	+ //select index of best gnome from fitness score
		181	+ size_t bestSIdx = 0;
		182	+ for (size_t i = 0; i < tSize; i++) {
		183	+ if (tournamentS[i] < tournamentS[bestSIdx])
		184	+ bestSIdx = i; //float check : it was like this b(&idx[i], &idx[j]) but gave me error
		185	+ }
		186	+
		187	+ for (size_t n = 0; n < f; n++)
		188	+ fittestgnome.push_back(tournamentP[bestSIdx * f + n]);
		189	+ return fittestgnome;
		190	+ } //end of tournament selection
		191	+
		192	+
		193	+ std::vector<unsigned int> crossover(std::vector<unsigned int> gnome1, std::vector<unsigned int> gnome2, float* M) {
		194	+ std::vector<unsigned int> gnome;
		195	+ for (size_t i = 0; i < f; i++) {
		196	+ // Crossover
		197	+ float r = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
		198	+ if (r <= uniformRate) {
		199	+ gnome.push_back(gnome1.at(i));
		200	+ }
		201	+ else {
		202	+ gnome.push_back(gnome2.at(i));
		203	+ }
		204	+ }
		205	+
		206	+ //check new gnome for all zero bands and duplicated values
		207	+ std::vector<unsigned int> gnomeunique;
		208	+ int flag = 0;
		209	+ std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
		210	+ std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique));
		211	+ /* if(gnomeunique.size()< gnome.size()){
		212	+ flag = 1;
		213	+ std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
		214	+ }*/
		215	+ unsigned int featureband, featureband1, featureband2;
		216	+ if (gnomeunique.size() < f) {
		217	+ for (size_t k = gnomeunique.size(); k < f; k++) {
		218	+ featureband = rand() % ub + lb;
		219	+ for (size_t i = 0; i < f; i++) {
		220	+ featureband1 = gnome1.at(i);
		221	+ featureband2 = gnome2.at(i);
		222	+ for (size_t j = 0; j < gnomeunique.size(); j++) {
		223	+ if (gnomeunique.at(j) != featureband1) {
		224	+ featureband = featureband1;
		225	+ }
		226	+ else if (gnomeunique.at(j) != featureband2) {
		227	+ featureband = featureband2;
		228	+ }
		229	+ else if (gnomeunique.at(j) == featureband) {
		230	+ featureband = rand() % ub + lb;
		231	+ while (M[featureband] == 0) {
		232	+ featureband = rand() % ub + lb;
		233	+ }
		234	+ }
		235	+ }
		236	+ }
		237	+ gnomeunique.push_back(featureband);
		238	+ }
		239	+ }
		240	+ //if(flag ==1){
		241	+ // std::cout<<"\n original gnome "<<g<<" are "<<std::endl;
		242	+ // for(int k = 0; k < gnome.size(); k++)
		243	+ // std::cout<<gnome[k]<<"\t";
		244	+ // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl;
		245	+ // for(int k = 0; k < gnomeunique.size(); k++)
		246	+ // std::cout<<gnomeunique[k]<<"\t";
		247	+ //}
		248	+
		249	+ return gnomeunique;
		250	+ }
		251	+
		252	+ void mutate(std::vector<unsigned int> gnome) {
		253	+ for (size_t i = 0; i < f; i++) {
		254	+ float LO = (float)0.01;
		255	+ float HI = 1;
		256	+ float r3 = LO + static_cast <float> (rand()) / (static_cast <float> (RAND_MAX / (HI - LO)));
		257	+ //if random value is less than mutationRate then mutate this gnome
		258	+ if (r3 <= mutationRate) {
		259	+ gnome.at(i) = (rand() % ub + lb);
		260	+ gnome.push_back(rand() % ub + lb);
		261	+ }
		262	+ }
		263	+ }
		264	+
		265	+ ///returns gnome of given index
		266	+ std::vector<unsigned int> getGnome(size_t idx) {
		267	+ std::vector<unsigned int> gnome;
		268	+ gnome.reserve(f);
		269	+ //pulling gnome idx from population P
		270	+ for (size_t n = 0; n < f; n++)
		271	+ gnome.push_back(P[idx * f + n]);
		272	+ //memcpy(&gnome[0], P+idxf, fsizeof(size_t));
		273	+ return gnome;
		274	+ }
		275	+
		276	+ //save gnome of index gIdx from previous population at position i in the new population
		277	+ void saveGnomeIdx(size_t i, size_t gIdx, unsigned int* newPop) {
		278	+ for (size_t n = 0; n < f; n++)
		279	+ newPop[i * f + n] = P[gIdx * f + n];
		280	+ }
		281	+
		282	+ void saveGnome(size_t idx, std::vector<unsigned int>gnome, unsigned int* newPop) {
		283	+ std::copy(gnome.begin(), gnome.end(), newPop + idx*f);
		284	+ }
		285	+
		286	+ size_t gIdxbestGnome() {
		287	+ //std::cout<<"best gnome indes is: "<<sortSIndx()[0];
		288	+ return sortSIndx()[0];
		289	+ }
		290	+
		291	+ void displaygnome(std::vector<unsigned int> gnome) {
		292	+ std::cout << "\t gnome: ";
		293	+ for (int i = 0; i<gnome.size(); ++i)
		294	+ std::cout << gnome[i] << ' ';
		295	+ std::cout << std::endl;
		296	+ }
		297	+
		298	+ //---------------------post processing of score-------------------------------------
		299	+ void Snorm() { //normalize gnome scores
		300	+ double s;
		301	+ for (size_t i = 0; i < p; i++) {
		302	+ s += S[i]; //sum of all gnome score in population
		303	+ }
		304	+ //std::cout<<"mean Score is: "<<(double) s/p;
		305	+ for (size_t i = 0; i <p; i++)
		306	+ S[i] = S[i] / s;
		307	+ }
		308	+
		309	+ size_t* sortSIndx() { //sort gnome index according to gnome scores
		310	+ //sort indices of score in ascending order (fitness value)
		311	+ size_t idx = (size_t)malloc(p * sizeof(size_t)); //array to hold sorted gnome index
		312	+ for (size_t i = 0; i < p; i++) { //initialize index array from 1 to p(population size) in an ascending order
		313	+ idx[i] = i;
		314	+ }
		315	+
		316	+ for (size_t i = 0; i<p; i++) { //sort gnome indices according to score values using bubble sort
		317	+ for (size_t j = i + 1; j<p; j++) {
		318	+ if (S[idx[i]] > S[idx[j]]) {
		319	+ std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
		320	+ }
		321	+ }
		322	+ }
		323	+
		324	+ //display best gnome
		325	+ //std::cout << "best fitness value: " << S[idx[0]] << std::endl;
		326	+ /*if (S[idx[0]] < 0) {
		327	+ std::cout << "best gnome is " << std::endl;
		328	+ for (size_t i = 0; i < f; i++)
		329	+ std::cout << P[f * idx[0] + i] << ", ";
		330	+ std::cout << std::endl;
		331	+ }*/
		332	+
		333	+ return idx; //use as sortSIdx in selection
		334	+ }
		335	+
		336	+
		337	+ //size_t* sortIndx(float* input, size_t size) {
		338	+ // //sort indices of score in ascending order (fitness value)
		339	+ // size_t *idx;
		340	+ // idx = (size_t)malloc(size sizeof(size_t));
		341	+ // for (size_t i = 0; i < size; i++)
		342	+ // idx[i] = i;
		343	+
		344	+ // for (size_t i = 0; i<size; i++) {
		345	+ // for (size_t j = i + 1; j<size; j++) {
		346	+ // if (input[idx[i]] < input[idx[j]]) {
		347	+ // std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
		348	+ // }
		349	+ // }
		350	+ // }
		351	+ // return idx; //use as sortSIdx in selection
		352	+
		353	+ //}
		354	+
		355	+ void generateNewP(unsigned int* newPop) {
		356	+ //std::memcpy(P, 0 , p * f *sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s
		357	+ std::memcpy(P, newPop, p * f * sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s
		358	+ }
		359	+
		360	+ //============================== functions for fitness function ===========================================================================
		361	+ //compute total mean M (1 X B) of all features (tP X B)
		362	+ void ttlMean(float* M, size_t tP, size_t B) {
		363	+ //std::cout<<"total number of pixels are "<<tP<<std::endl;
		364	+ for (int k = 0; k < tP; k++) { //total number of pixel in feature matrix
		365	+ for (size_t n = 0; n < B; n++) { // index of feature in ith gnome
		366	+ M[n] += F[k * B + n];
		367	+ }
		368	+ }
		369	+ for (size_t n = 0; n < B; n++) //take an avarage of above summation
		370	+ M[n] = M[n] / (float)tP;
		371	+ }
		372	+
		373	+ void dispalymean(float* M) { //display mean
		374	+ std::cout << std::endl;
		375	+ std::cout << "Total mean of gnome 1 features are is " << std::endl;
		376	+
		377	+ for (size_t i = 0; i < 1; i++) {
		378	+ for (size_t j = 0; j < f; j++) {
		379	+ size_t index = P[i*f + j];
		380	+ std::cout << "feature index " << index << "\t total mean" << M[index] << std::endl;
		381	+ }
		382	+ }
		383	+ std::cout << std::endl;
		384	+ }
		385	+
		386	+ //Compute class means cM (p x nC x f) of all gnome features phe(tP x f)
		387	+ void classMean(float* cM, size_t tP, size_t nC, size_t B, std::vector<unsigned int> nPxInCls) {
		388	+ for (size_t c = 0; c < nC; c++) { //index of class feature matrix responses
		389	+ float* tempcM = (float*)calloc(B, sizeof(float)); //tempcM holds classmean vector for current gnome 'i', class 'c'
		390	+ for (size_t k = 0; k < tP; k++) { //total number of pixel in feature matrix
		391	+ if (T[k] == c + 1) { //class numbers start from 1 not 0
		392	+ for (size_t n = 0; n < B; n++) { //total number of features in a gnome
		393	+ tempcM[n] += F[k * B + n]; //add phe value for feature n of class 'c' in ith gnome
		394	+ }
		395	+ }
		396	+ }
		397	+ for (size_t n = 0; n < B; n++)
		398	+ cM[c * B + n] = tempcM[n] / (float)nPxInCls[c]; //divide by number of pixels from class 'c'
		399	+
		400	+ }
		401	+
		402	+ }
		403	+
		404	+ //display class mean
		405	+ void dispalyClassmean(float* cM, size_t nC) {
		406	+ std::cout << std::endl;
		407	+ std::cout << "class mean of gnome 1 with total classes " << nC << " is :" << std::endl;
		408	+ for (size_t i = 0; i < 1; i++) {
		409	+ for (size_t c = 0; c < nC; c++) {
		410	+ for (size_t j = 0; j < f; j++) {
		411	+ size_t index = P[i*f + j];
		412	+
		413	+ std::cout << "class index: " << c << "\t feature index " << index << "\t class mean " << cM[c * ub + index] << std::endl;
		414	+ }
		415	+ }
		416	+ }
		417	+ std::cout << std::endl;
		418	+ }
		419	+
		420	+ //-----------------------------------------between and within class Scattering computation---------------------------------------------------------------
		421	+ //computation on CPU
		422	+ void cpu_computeSbSw(float* sb, float* sw, float* M, float* cM, size_t nC, size_t tP, std::vector<unsigned int> nPxInCls) {
		423	+ timer.start();
		424	+ computeSb(sb, M, cM, nC, nPxInCls); //compute between class scatter on CPU
		425	+ const auto elapsed = timer.time_elapsed();
		426	+ std::cout << "Sb CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count() << "us" << std::endl;
		427	+
		428	+ timer.start();
		429	+ computeSw(sw, cM, nC, tP); //compute within class scatter on CPU
		430	+ const auto elapsed1 = timer.time_elapsed();
		431	+ std::cout << "Sw CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
		432	+ }
		433	+
		434	+ //display between class scatter
		435	+ void displaySb(float* sb) {
		436	+ std::cout << "between scatter is " << std::endl;
		437	+ for (size_t g = 0; g<1; g++) {
		438	+ std::cout << std::endl;
		439	+ for (size_t j = 0; j < f; j++) { //total number of features in a gnome
		440	+ for (size_t k = 0; k < f; k++) { //total number of features in a gnome
		441	+ std::cout << sb[g * f * f + j * f + k] << " ";
		442	+ }
		443	+ std::cout << std::endl;
		444	+ }
		445	+ }
		446	+ std::cout << std::endl;
		447	+ }
		448	+
		449	+ //Compute between class scatter sb (p x f x f) of all gnome features phe(tP x f)
		450	+ void computeSb(float* sb, float* M, float* cM, size_t nC, std::vector<unsigned int> nPxInCls) {
		451	+ float tempsbval;
		452	+ size_t n1;
		453	+ size_t n2;
		454	+ size_t classIndx; //class index in class mean matrix
		455	+ /*std::cout <<"population of computation of cpusb "<< std::endl;
		456	+ for (size_t i2 = 0; i2 < f; i2++) {
		457	+ std::cout << P[i2] << "\t";
		458	+ }*/
		459	+
		460	+ for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) {
		461	+ for (size_t c = 0; c < nC; c++) {
		462	+ for (size_t i = 0; i < f; i++) {
		463	+ for (size_t j = 0; j < f; j++) {
		464	+ tempsbval = 0;
		465	+ classIndx = c * ub;
		466	+ n1 = P[gnomeIndx * f + i]; //actual feature index in original feature matrix
		467	+ n2 = P[gnomeIndx * f + j]; //actual feature index in original feature matrix
		468	+ // std::cout << "i: " << i << " j: " <<j<< " n1: " << n1 << " n2:" << n2 << std::endl;
		469	+ tempsbval = ((cM[classIndx + n1] - M[n1]) *(cM[classIndx + n2] - M[n2]));
		470	+ sb[gnomeIndx * f * f + i * f + j] += tempsbval * (float)nPxInCls[c]; // compute tempsb[j][k] element of class 'c' of gnome 'i'
		471	+ }
		472	+ }
		473	+ }
		474	+ }
		475	+
		476	+ }
		477	+
		478	+ //Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f)
		479	+ void computeSw(float* sw, float* cM, size_t nC, size_t tP) {
		480	+ float tempswval;
		481	+ size_t n1;
		482	+ size_t n2;
		483	+ size_t cMclass; //class index in class mean matrix
		484	+ size_t Pg;
		485	+ size_t swg;
		486	+ size_t pheg;
		487	+ for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) {
		488	+ Pg = gnomeIndx * f;
		489	+ swg = gnomeIndx * f * f;
		490	+ pheg = gnomeIndx * tP * f;;
		491	+ for (size_t c = 0; c < nC; c++) {
		492	+ cMclass = c * ub;
		493	+
		494	+ for (size_t k = 0; k < tP; k++) {
		495	+ if (T[k] == (c + 1)) {
		496	+ for (size_t i = 0; i < f; i++) {
		497	+ for (size_t j = 0; j < f; j++) {
		498	+ n1 = P[Pg + i]; //actual feature index in original feature matrix
		499	+ n2 = P[Pg + j]; //actual feature index in original feature matrix
		500	+
		501	+ tempswval = 0;
		502	+ tempswval = ((F[k * ub + n1] - cM[cMclass + n1]) * (F[k * ub + n2] - cM[cMclass + n2]));
		503	+ //tempswval = ((phe[gnomeIndx * tP * f + k * f + i] - cM[c * ub + P[gnomeIndx * f + i]]) * (phe[gnomeIndx * tP f + k f + j] - cM[c * ub + P[gnomeIndx * f + j]]));
		504	+ sw[gnomeIndx * f * f + i * f + j] += tempswval;
		505	+ }
		506	+ }
		507	+ }
		508	+ }
		509	+ }
		510	+
		511	+ }
		512	+ }
		513	+ //checking bands with all zeros and replacing duplicated bands in gnome but this function is only for initial population
		514	+ //void zerobandcheck(float* M, bool initial) {
		515	+ // for (size_t g = 0; g < p; g++) { // for each gnome
		516	+ // for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome
		517	+ // while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population
		518	+ // P[g * f + i] = rand() % ub + lb;
		519	+ // }
		520	+ // }
		521	+ // //checking for duplicats in a gnome
		522	+ // std::vector<unsigned int> gnome = getGnome(g);
		523	+ // std::vector<unsigned int> gnomeunique;
		524	+ // int flag = 0; //flag will be set if gnome has duplicated band (feature) index
		525	+ // std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
		526	+ // std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //keep only unique copies of indices and remove duplicate copies
		527	+ // if (gnomeunique.size()< gnome.size()) {
		528	+ // flag = 1; //set flag for those if there are duplicated indices
		529	+ // //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
		530	+ // }
		531	+
		532	+ // //adding extra random feature indices to unique copy of gnome to achive gnome size = f
		533	+ // if (gnomeunique.size() < f) {
		534	+ // for (size_t k = gnomeunique.size(); k < f; k++) {
		535	+ // unsigned int rnumber = rand() % ub + lb;
		536	+ // //check if this randomaly generated number is already present in that gnome or not
		537	+ // for (size_t j = 0; j < gnomeunique.size(); j++) {
		538	+ // if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number
		539	+ // rnumber = rand() % ub + lb;
		540	+ // j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome
		541	+ // }
		542	+ // }
		543	+ // gnomeunique.push_back(rnumber); //add feature index to gnomeunique
		544	+ // }
		545	+ // }
		546	+ // std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f);
		547	+ // }
		548	+ //}
		549	+
		550	+ //checking bands with all zeros and replacing duplicated bands in gnome
		551	+ void zerobandcheck(float* M, bool initialPop) {
		552	+ size_t startgnome;
		553	+ if (initialPop) {
		554	+ startgnome = 0; //for initial population check all gnomes
		555	+ }
		556	+ else {
		557	+ startgnome = 1; //for next generations start gnome check after elite children offset
		558	+ }
		559	+ for (size_t g = startgnome; g < p; g++) { // for each gnome except
		560	+
		561	+ for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome
		562	+ while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population
		563	+ P[g * f + i] = rand() % ub + lb;
		564	+ }
		565	+ }
		566	+ //checking for duplicats in a gnome
		567	+ std::vector<unsigned int> gnome = getGnome(g); //get current gnome g from population matrix P
		568	+ std::vector<unsigned int> gnomeunique; //array to store only unique band indicies in a genome
		569	+ int flag = 0; //flag will be set if gnome has duplicated band (feature) index
		570	+ std::sort(gnome.begin(), gnome.end()); //sort current gnome
		571	+ std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //remove duplicat copies of band indices and keep only unique in a gnome
		572	+ if (gnomeunique.size()< gnome.size()) {
		573	+ flag = 1; //set flag for those if there are duplicated indices
		574	+ //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
		575	+ }
		576	+
		577	+ //adding extra random feature indices to unique copy of gnome to achive gnome size = f
		578	+ if (gnomeunique.size() < f) {
		579	+ for (size_t k = gnomeunique.size(); k < f; k++) {
		580	+ unsigned int rnumber = rand() % ub + lb;
		581	+ //check if this randomaly generated number is already present in that gnome or not
		582	+ for (size_t j = 0; j < gnomeunique.size(); j++) {
		583	+ if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number
		584	+ rnumber = rand() % ub + lb; //generate random number between upper bound and lower bound (ub. lb)
		585	+ j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome
		586	+ }
		587	+ }
		588	+ gnomeunique.push_back(rnumber); //add feature index to gnomeunique
		589	+ }
		590	+ }
		591	+
		592	+ //diplay loop only if gnome has duplicated indices
		593	+ //if(flag ==1){
		594	+ // std::cout<<"\n original gnome "<<g<<" are "<<std::endl;
		595	+ // for(int k = 0; k < gnome.size(); k++)
		596	+ // std::cout<<gnome[k]<<"\t";
		597	+ // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl;
		598	+ // for(int k = 0; k < gnomeunique.size(); k++)
		599	+ // std::cout<<gnomeunique[k]<<"\t";
		600	+ //}
		601	+ std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f); //copy new gnome without any duplicate band index at current gnome location
		602	+ }
		603	+ }
		604	+
		605	+
		606	+
		607	+ //gpu calling functions
		608	+ //gpu initialization (allocating space for all array on GPU)
		609	+ void gpuInitializationfrommain(float* cpuM, float* cpuCM, std::vector<unsigned int>cpu_nPxInCls, size_t tP, size_t nC) {
		610	+ // call gpuInitialization(......) with all of the necessary parameters
		611	+ gpuIntialization(&gpuP, p, f, &gpuCM, cpuCM, nC, ub, &gpuM, cpuM, &gpu_nPxInCls, &gpuSb, &gpuSw, &gpuF, F, &gpuT, T, tP, &cpu_nPxInCls[0]);
		612	+
		613	+ }
		614	+
		615	+ //Computation of between class scatter and within class scatter in GPU
		616	+ void gpu_computeSbSw(float* cpuSb, float* cpuSw, size_t nC, size_t tP, cudaDeviceProp props, size_t gen, bool debug, std::ofstream& profilefile) {
		617	+ //calling function for SW and Sb computation and passing necessary arrays for computation
		618	+ // std::cout<<"gpu function calling"<<std::endl;
		619	+ gpucomputeSbSw(gpuP, P, p, f, gpuSb, cpuSb, gpuSw, cpuSw, gpuF, gpuT, gpuM, gpuCM, nC, tP, props, gen, gnrtn, ub, gpu_nPxInCls, profilefile);
		620	+
		621	+ //display computed Sb and Sw if debug is set
		622	+ if (debug) {
		623	+ std::cout << "From GA-GPU class: gpu results of Sb sn Sw" << std::endl;
		624	+ displayS(cpuSb, f); //display Sb
		625	+ displayS(cpuSw, f); //display Sw
		626	+ std::cout << std::endl;
		627	+ }
		628	+ }
		629	+
		630	+ //call function to free gpu pointers
		631	+ //free all gpu pointers
		632	+ void gpu_Destroy() {
		633	+ gpuDestroy(gpuP, gpuCM, gpuM, gpu_nPxInCls, gpuSb, gpuSw, gpuF, gpuT);
		634	+ }
		635	+
		636	+ //Write a destructor here
		637	+ ~ga_gpu() {
		638	+
		639	+ if (F != NULL) std::free(F); //not sure about this as it is only for 2nd constructor
		640	+ if (T != NULL) std::free(T); //same as above
		641	+ if (P != NULL) std::free(P); //not sure about this as it is only for 2nd constructor
		642	+ if (S != NULL) std::free(S); //same as above
		643	+ //if(i_guess!=NULL) std::free(i_guess); //same as above
		644	+ //HANDLE_ERROR(cudaDeviceReset());
		645	+
		646	+ }
		647	+ };
		648	+
		649	+#endif

src/main.cpp 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/src/main.cpp
		1	+#include <iostream>
		2	+
		3	+//stim libraries
		4	+#include <stim/envi/envi.h>
		5	+#include <stim/image/image.h>
		6	+#include <stim/ui/progressbar.h>
		7	+#include <stim/parser/filename.h>
		8	+#include <stim/parser/table.h>
		9	+#include <stim/parser/arguments.h>
		10	+//input arguments
		11	+stim::arglist args;
		12	+#include <fstream>
		13	+#include <thread>
		14	+#include <random>
		15	+#include <vector>
		16	+#include <math.h>
		17	+#include <limits>
		18	+
		19	+#define NOMINMAX
		20	+
		21	+
		22	+
		23	+//GA
		24	+#include "ga_gpu.h"
		25	+#include "enviload.h"
		26	+
		27	+
		28	+//envi input file and associated parameters
		29	+stim::envi E; //ENVI binary file object
		30	+unsigned int B; //shortcuts storing the spatial and spectral size of the ENVI image
		31	+//mask and class information used for training
		32	+//std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel#
		33	+std::vector<unsigned int> nP; //array holds the number of pixels in each mask: nP[m] is the number of pixels in mask m
		34	+size_t nC = 0; //number of classes
		35	+size_t tP = 0; //total number of pixels in all masks: tP = nP[0] + nP[1] + ... + nP[nC]
		36	+float* fea;
		37	+
		38	+//ga_gpu class object
		39	+ga_gpu ga;
		40	+bool debug;
		41	+bool binaryClass;
		42	+int binClassOne;
		43	+
		44	+//creating struct to pass to thread functions as it limits number of arguments to 3
		45	+typedef struct {
		46	+ float* S;
		47	+ float* Sb;
		48	+ float* Sw;
		49	+ float* lda;
		50	+}gnome;
		51	+gnome gnom;
		52	+
		53	+
		54	+void gpuComputeEignS( size_t g, size_t fea){
		55	+ //eigen value computation will return r = (nC-1) eigen vectors so new projected data will have dimension of r rather than f
		56	+ // std::thread::id this_id = std::this_thread::get_id();
		57	+ // std::cout<<"thread id is "<< this_id<<std::endl;
		58	+ size_t f = fea;
		59	+ //std::thread::id g = std::this_thread::get_id();
		60	+ float* LeftEigVectors_a = (float) malloc(f f * sizeof(float));
		61	+ float* gSw_a = (float) malloc(f f * sizeof(float)); //copy of between class scatter
		62	+ std::memcpy(gSw_a, &gnom.Sw[g * f * f], f * f *sizeof(float));
		63	+ if(debug){
		64	+ std::cout<<"From Eigen function: Sb and Sw "<<std::endl;
		65	+ displayS(gSw_a, f); //display Sb
		66	+ displayS(&gnom.Sb[g * f * f], f); //display Sw
		67	+ std::cout<<std::endl;
		68	+ }
		69	+
		70	+ std::vector<unsigned int> features = ga.getGnome(g);
		71	+ std::vector<unsigned int> featuresunique;
		72	+ int flag = 0;
		73	+ std::sort(features.begin(), features.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
		74	+ std::unique_copy(features.begin(), features.end(), std::back_inserter(featuresunique));
		75	+ if(featuresunique.size()< features.size()){
		76	+ f = featuresunique.size();
		77	+ }
		78	+
		79	+ size_t r = nC-1; //LDA projected dimension (limited to number of classes - 1 by rank)
		80	+ if(r > f){
		81	+ r = f;
		82	+ }
		83	+
		84	+ int info;
		85	+ float* EigenvaluesI_a = (float)malloc(f sizeof(float));
		86	+ float* Eigenvalues_a = (float)malloc(f sizeof(float));
		87	+ int IPIV = (int) malloc(sizeof(int) * f);
		88	+ //computing inverse of matrix Sw
		89	+ memset(IPIV, 0, f * sizeof(int));
		90	+ LAPACKE_sgetrf(LAPACK_COL_MAJOR, (int)f, (int)f, gSw_a, (int)f, IPIV);
		91	+ // DGETRI computes the inverse of a matrix using the LU factorization computed by DGETRF.
		92	+ LAPACKE_sgetri(LAPACK_COL_MAJOR, (int)f, gSw_a, (int)f, IPIV);
		93	+
		94	+ float* gSbSw_a = (float)calloc(f f, sizeof(float));
		95	+ //mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f * sizeof(float)], f, f, f,f);
		96	+ mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f], f, f, f,f);
		97	+ if(debug){
		98	+ std::cout<<"From Eigen function: inverse of sw and ratio of sb and sw (Sb/Sw)";
		99	+ displayS(gSw_a, f); //display inverse of Sw (1/Sw)
		100	+ displayS(gSbSw_a, f); //display ratio of Sb and Sw (Sb/Sw)
		101	+ }
		102	+
		103	+ //compute left eigenvectors for current gnome from ratio of between class scatter and within class scatter: Sb/Sw
		104	+ info = LAPACKE_sgeev(LAPACK_COL_MAJOR, 'V', 'N', (int)f, gSbSw_a, (int)f, Eigenvalues_a, EigenvaluesI_a, LeftEigVectors_a, (int)f, 0, (int)f);
		105	+ //sort eignevalue indices in descending order
		106	+ size_t* sortedindx = sortIndx(Eigenvalues_a, f);
		107	+ //displayS(LeftEigVectors_a, f); //display Eignevectors (Note these are -1 * matlab eigenvectors does not change fitness score results but keep in mind while projecting data on it)
		108	+ //sorting left eigenvectors (building forward transformation matrix As)
		109	+ for (size_t rowE = 0; rowE < r; rowE++){
		110	+ for (size_t colE = 0; colE < f; colE++){
		111	+ size_t ind1 = g * r * f + rowE * f + colE;
		112	+ //size_t ind1 = rowE * f + colE;
		113	+ size_t ind2 = sortedindx[rowE] * f + colE; //eigenvector as row vector
		114	+ gnom.lda[ind1] = LeftEigVectors_a[ind2];
		115	+ }
		116	+ }
		117	+
		118	+ if(debug){
		119	+ std::cout<<"Eigenvalues are"<<std::endl;
		120	+ for(size_t n = 0 ; n < f; n ++){
		121	+ std::cout << Eigenvalues_a[n] << ", " ;
		122	+ }
		123	+ std::cout<< std::endl;
		124	+ std::cout<<"From Eigen function: Eignevector"<<std::endl;
		125	+
		126	+ std::cout<<"LDA basis is "<<std::endl;
		127	+ std::cout << "r is " << r << std::endl;
		128	+ for(size_t l = 0 ; l < r; l++){
		129	+ for(size_t n = 0 ; n < f; n ++){
		130	+ std::cout << gnom.lda[g * l * f + l * f + n] << ", " ;
		131	+ }
		132	+ std::cout<<std::endl;
		133	+ }
		134	+
		135	+ }
		136	+ //Extract only r eigne vectors as a LDA projection basis
		137	+ float* tempgSb = (float)calloc(r f, sizeof(float));
		138	+ //mtxMul(tempgSb, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sb[g * f * f * sizeof(float)], r, f, f,f);
		139	+ //mtxMul(tempgSb, &lda[g * r * f ], gSb, r, f, f,f);
		140	+ mtxMul(tempgSb, &gnom.lda[g * r * f], &gnom.Sb[g * f * f], r, f, f,f);
		141	+ float* nSb = (float)calloc(r r, sizeof(float));
		142	+ mtxMultranspose(nSb, tempgSb, &gnom.lda[g * r * f], r, f, r, f);
		143	+
		144	+ float* tempgSw = (float)calloc(r f, sizeof(float));
		145	+ //mtxMul(tempgSw, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sw[g * f * f * sizeof(float)], r, f, f,f);
		146	+ mtxMul(tempgSw, &gnom.lda[g * r * f], &gnom.Sw[g * f * f], r, f, f,f);
		147	+ float* nSw = (float)calloc(r r, sizeof(float));
		148	+ mtxMultranspose(nSw, tempgSw, &gnom.lda[g * r * f], r, f, r, f);
		149	+ if(debug){
		150	+ std::cout<<"From Eigen function: projected Sb sn Sw"<<std::endl;
		151	+ displayS(nSb, r); //display Sb
		152	+ displayS(nSw, r); //display Sw
		153	+ std::cout<<std::endl;
		154	+ }
		155	+
		156	+ cv::Mat newSw = cv::Mat((int)r, (int)r, CV_32FC1, nSw); //within scatter of gnome g in the population
		157	+ cv::Mat newSb = cv::Mat((int)r, (int)r, CV_32FC1, nSb); //within scatter of gnome g in the population
		158	+
		159	+ //fisher's ratio from ratio of projected sb and sw
		160	+ float fisherRatio = cv::determinant(newSb) /cv::determinant(newSw);
		161	+ gnom.S[g] = 1/fisherRatio;
		162	+ if (debug) {
		163	+ std::cout<<"Score["<<g<<"]: "<< gnom.S[g]<<std::endl;
		164	+
		165	+ std::cout << "best gnoem is " << std::endl;
		166	+ for (size_t i = 0; i < f; i++)
		167	+ std::cout << ga.P[ga.f * g + i] << ", ";
		168	+ std::cout << std::endl;
		169	+ }
		170	+// if(!isfinite(gnom.S[g])){
		171	+// std::cout<<"-----------------------------------------------"<<std::endl;
		172	+// std::cout<<"Displaying intermediate values of gnome for which score is non finite"<<std::endl;
		173	+// std::cout<<"population "<<std::endl;
		174	+// for(int i = 0; i < ga.f; i++){
		175	+// std::cout<<"\t"<<ga.P[g * ga.f + i];
		176	+// }
		177	+// std::cout<<std::endl;
		178	+// std::cout<<"Sb determinant is "<<cv::determinant(newSb)<<"\t Sw determinant is "<<cv::determinant(newSb)<<std::endl;
		179	+// std::cout<<"fisher ratio is "<<fisherRatio<<std::endl;
		180	+// std::cout<<"Score["<<g<<"]: "<< gnom.S[g]<<std::endl;
		181	+// std::cout<<"------------------------------------------------"<<std::endl;
		182	+//
		183	+// }
		184	+
		185	+
		186	+ if(IPIV!= NULL) std::free(IPIV);
		187	+ if(gSw_a!= NULL) std::free(gSw_a);
		188	+ if(gSbSw_a!= NULL) std::free(gSbSw_a);
		189	+ if(Eigenvalues_a!= NULL) std::free(Eigenvalues_a);
		190	+ if(EigenvaluesI_a!= NULL) std::free(EigenvaluesI_a);
		191	+ if(tempgSb!= NULL) std::free(tempgSb);
		192	+ if(tempgSw!= NULL) std::free(tempgSw);
		193	+
		194	+}
		195	+
		196	+
		197	+void fitnessFunction( float* sb, float* sw, float* lda, float* M, float* cM, size_t f, cudaDeviceProp props, size_t gen, std::ofstream& profilefile){
		198	+
		199	+ size_t tP = 0; //total number of pixels
		200	+ std::for_each(nP.begin(), nP.end(), [&] (size_t n) {
		201	+ tP += n;
		202	+ });
		203	+ size_t nC = nP.size(); //total number of classes
		204	+
		205	+ //--------------Compute between class scatter
		206	+ // ga.cpu_computeSbSw(sb, sw, M, cM, nC, tP, nP, gen);
		207	+ //
		208	+ //if(debug){
		209	+ // std::cout<<"cpu results of Sb sn Sw"<<std::endl;
		210	+ // displayS(sb, ga.f); //display Sb
		211	+ // displayS(sw, ga.f); //display Sw
		212	+ // }
		213	+
		214	+ //ga.callingGpuComputeSbSw(sb, sw, nC, tP, nP, props, gen);
		215	+ ga.gpu_computeSbSw(sb, sw, nC, tP, props, gen, debug, profilefile);
		216	+ //ga.cpu_computeSbSw(sb, sw, M, cM, nC, tP, nP);
		217	+
		218	+ if(debug){
		219	+ std::cout<<"From fitness function: gpu results of Sb sn Sw"<<std::endl;
		220	+ displayS(sb, ga.f); //display Sb
		221	+ displayS(sw, ga.f); //display Sw
		222	+ std::cout<<std::endl;
		223	+ }
		224	+
		225	+ // ----------------------- Linear discriminant Analysis --------------------------------------
		226	+ //timer.start();
		227	+ //structure is created to pass variable to thread function as it accepts only 3 arguments
		228	+ gnom.S = ga.S;
		229	+ gnom.Sw = sw;
		230	+ gnom.Sb = sb;
		231	+ gnom.lda = lda;
		232	+
		233	+ //calling function without using threads
		234	+ for (size_t i = 0; i<ga.p; i++){
		235	+ //calling function for eigencomputation
		236	+ gpuComputeEignS(i, f);
		237	+ //std::cout<<"Score["<<i<<"]: "<< ga.S[i]<<std::endl;
		238	+ }
		239	+
		240	+ //std::vector<std::thread> threads;
		241	+ //for (size_t g = 0; g<ga.p; g++){
		242	+ // //creating thread to do eigen computation
		243	+ // threads.push_back(std::thread(gpuComputeEignS, g, f));
		244	+ //}
		245	+ //
		246	+ //// loop again to join the threads
		247	+ //for (auto& t : threads)
		248	+ // t.join();
		249	+
		250	+ const auto elapsed1 = timer.time_elapsed();
		251	+ if(gen > ga.gnrtn - 2){
		252	+ std::cout << "gpu_eigen time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
		253	+ profilefile<< "gpu_eigen time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
		254	+ }
		255	+ //ga.S = gnom.score;
		256	+ //size_t bestGnomeIdx = ga.sortSIndx()[0];
		257	+
		258	+}//end of fitness function
		259	+
		260	+void binaryclassifier(int classnum){
		261	+ unsigned int* target = (unsigned int*) calloc(tP, sizeof(unsigned int));
		262	+ memcpy(target, ga.T, tP * sizeof(unsigned int));
		263	+ for(int i = 0 ; i < tP; i++){
		264	+ if(target[i]==classnum){
		265	+ ga.T[i] = 1;
		266	+
		267	+ }else
		268	+ ga.T[i] = 0;
		269	+ }
		270	+}
		271	+
		272	+
		273	+
		274	+void advertisement() {
		275	+ std::cout << std::endl;
		276	+ std::cout << "=========================================================================" << std::endl;
		277	+ std::cout << "Thank you for using the GA-GPU features selection for spectroscopic image!" << std::endl;
		278	+ std::cout << "=========================================================================" << std::endl << std::endl;
		279	+// std::cout << args.str();
		280	+}
		281	+
		282	+int main(int argc, char* argv[]){
		283	+
		284	+//Add the argument options and set some of the default parameters
		285	+ args.add("help", "print this help");
		286	+ args.section("Genetic Algorithm");
		287	+ args.add("features", "select features selection algorithm parameters","10", "number of features to be selected");
		288	+ args.add("classes", "image masks used to specify classes", "", "class1.bmp class2.bmp class3.bmp");
		289	+ args.add("population", "total number of feature subsets in puplation matrix", "1000");
		290	+ args.add("generations", "number of generationsr", "50");
		291	+ args.add("initial_guess", "initial guess of featues", "");
		292	+ args.add("debug", "display intermediate data for debugging");
		293	+ args.add("binary", "Select features for binary classes", "");
		294	+ args.add("trim", "this gives wavenumber to use in trim option of siproc which trims all bands from envi file except gagpu selected bands");
		295	+
		296	+ args.parse(argc,argv); //parse the command line arguments
		297	+
		298	+//Print the help text if set
		299	+ if(args["help"].is_set()){ //display the help text if requested
		300	+ advertisement();
		301	+ std::cout<<std::endl<<"usage: ga-gpu input output --option [A B C ...]"<<std::endl;
		302	+ std::cout<<std::endl<<std::endl;
		303	+ std::cout<<args.str()<<std::endl;
		304	+ exit(1);
		305	+ }
		306	+ if (args.nargs() < 2) { //if the user doesn't provide input and output files
		307	+ std::cout << "ERROR: GA-GPU requires an input (ENVI) file and an output (features, text) file." << std::endl;
		308	+ return 1;
		309	+ }
		310	+ if (args["classes"].nargs() < 2) { //if the user doesn't specify at least two class images
		311	+ std::cout << "ERROR: GA-GPU requires at least two class images to be specified using the --classes option" << std::endl;
		312	+ return 1;
		313	+ }
		314	+
		315	+ std::string outfile = args.arg(1); //outfile is text file where bnad index, LDA-basis, wavelength and if --trim option is set then trim wavelengths are set respectively
		316	+ std::string profile_file = "profile_" + outfile ;
		317	+ std::ofstream profilefile(profile_file.c_str(), std::ios::out); //open outfstream for outfile
		318	+
		319	+ time_t t_start = time(NULL); //start a timer for file reading
		320	+ E.open(args.arg(0), std::string(args.arg(0)) + ".hdr"); //open header file
		321	+ size_t X = E.header.samples; //total number of pixels in X dimension
		322	+ size_t Y = E.header.lines; //total number of pixels in Y dimension
		323	+ B = (unsigned int)E.header.bands; //total number of bands (features)
		324	+ std::vector<double> wavelengths = E.header.wavelength; //wavelengths of each band
		325	+
		326	+ if(E.header.interleave != stim::envi_header::BIP){ //this code can only load bip files and hence check that in header file
		327	+ std::cout<<"this code works for only bip files. please convert file to bip file"<<std::endl;
		328	+ exit(1); //if file is not bip file exit code execution
		329	+ }
		330	+
		331	+///--------------------------Load features---------------------------------------------
		332	+ nP = ga_load_class_images(argc, args, &nC, &tP); //load supervised class images
		333	+ ga.F = load_features( nC, tP, B, E, nP); //generate the feature matrix
		334	+ ga.T = ga_load_responses(tP, nC, nP); //load the responses for RF training
		335	+ E.close(); //close the hyperspectral file
		336	+ time_t t_end = time(NULL);
		337	+ std::cout<<"Total time: "<<t_end - t_start<<" s"<<std::endl;
		338	+ //display_PixelfeatureNclass(ga.F, ga.T , B , 1); //Print one value from feature matrix to debug feature loading
		339	+ //std::cout << "pixel target is " << ga.T[0] << " " << ga.T[1] << " " << ga.T[tP - 2] << " " << ga.T[tP - 1]<<std::endl;
		340	+///--------------------------Genetic algorith configurations with defult paramets and from argument values---------------------
		341	+ ga.f = args["features"].as_int(0); //number of features to be selected by user default value is 10
		342	+ ga.p = args["population"].as_int(0); //population size to be selected by user default value is 1000
		343	+ ga.gnrtn = args["generations"].as_int(0); //number of generations to be selected by user default value is 50
		344	+ if(args["binary"]) { //set this option when features are to be selected as binary clas features (class vs stroma)
		345	+ binClassOne = args["binary"].as_int(0); //sel class number here, if 2 then features are selected for (class-2 vs stroma)
		346	+ //feture selection for class selected by user with user arguments (make it binary class data by making chosen class label as 1 and al other class labels 0 from multiclass data )
		347	+ //to select feature for all classes in joint class data using binary class system need to write a script with loop covering all classes
		348	+ binaryclassifier(binClassOne);
		349	+ } ///not fully implemented yet
		350	+
		351	+ ga.ub = B; //upper bound is number of bands (i.e. size of z dimension) Note: for this particular application and way code is written lower bound is 0 and upper bound is size of z dimension
		352	+ ga.uniformRate = 0.5; //uniform rate is used in crossover
		353	+ ga.mutationRate = 0.5f; //in percentage for mutation operation on gnome
		354	+ ga.tournamentSize = 5; //for crossover best parents are selected from tournament of gnomes
		355	+ ga.elitism = true; // if it is true then best gnome of current generation is passed to next generation
		356	+ //initial guess of population
		357	+ ga.i_guess = (unsigned int*) calloc(ga.f, sizeof(unsigned int));
		358	+ debug = args["debug"];
		359	+
		360	+//==================Generate intial population =================================================
		361	+ std::vector<unsigned int> i_guess(ga.f);
		362	+ for (size_t b = 0; b < ga.f; b++) //generate default initial guess
		363	+ i_guess[b] = rand() % B + 0;
		364	+
		365	+ if (args["initial_guess"].is_set()) {//if the user specifies the --initialguess option & provides feature indices as initial guess
		366	+ size_t nf = args["initial_guess"].nargs(); //get the number of arguments after initial_guess
		367	+ if (nf == 1 \|\| nf == ga.f) { //check if file with initial guessed indices is given or direct indices are given as argument
		368	+ if (nf == 1) { //if initial guessed feature indices are given in file
		369	+ std::ifstream in; //open the file containing the baseline points
		370	+ in.open(args["initial_guess"].as_string().c_str());
		371	+ if (in.is_open()){ //if file is present and can be opened then read it
		372	+ unsigned int b_ind;
		373	+ while (in >> b_ind) //get each band index and push it into the vector
		374	+ i_guess.push_back(b_ind);
		375	+ }
		376	+ else
		377	+ std::cout << "cannot open file of initial_guess indices" << std::endl;
		378	+ }
		379	+ else if (nf == ga.f) { //if direct indices are given as argument
		380	+ for (size_t b = 0; b < nf; b++) //for each band given by the user
		381	+ i_guess[b] = args["initial_guess"].as_int(b); //store that band in the i_guess array
		382	+ }
		383	+ }
		384	+ }
		385	+
		386	+ ga.initialize_population(i_guess, debug); //initialize first population set for first generation, user can pass manually preferred features from command line
		387	+ //display_gnome(0);
		388	+
		389	+//------------------Calculate class means and total mean of features----------------------------
		390	+ float* M = (float*)calloc( B , sizeof(float)); //total mean of entire feature martix for all features (bands B)
		391	+ ga.ttlMean(M, tP, B); //calculate total mean, ga.F is entire feature matrix, M is mean for all bands B(features)
		392	+ if(debug) ga.dispalymean(M); //if option --debug is used display all bands mean
		393	+
		394	+ //display band index of bands with mean zero, this indicates that band has all zero values
		395	+ std::cout<<"Display features indices with zero mean "<<std::endl;
		396	+ for(unsigned int i = 0; i < B; i++){
		397	+ if(M[i]== 0)
		398	+ std::cout<<"\t"<<i;
		399	+ }
		400	+ std::cout<<std::endl;
		401	+// std::cout << "pixel target is " << ga.T[0] << " " << ga.T[1] << " " << ga.T[tP - 2] << " " << ga.T[tP - 1]<<std::endl;
		402	+ float* cM = (float)calloc(nC B , sizeof(float)); //cM is nC X B matrix with each row as mean of all samples in one class for all features (bands B)
		403	+ ga.classMean(cM, tP, nC, B, nP); //calculate class mean, ga.F is entire feature matrix, M is mean for all bands B(features)
		404	+ if(debug) ga.dispalyClassmean(cM, nC);
		405	+
		406	+//------------------------------------GPU init----------------------------------------------------
		407	+ //checking for cuda device
		408	+ int count;
		409	+ HANDLE_ERROR(cudaGetDeviceCount(&count));
		410	+ if(count < 1){
		411	+ std::cout<<"no cuda device is available"<<std::endl;
		412	+ return 1;
		413	+ }
		414	+ cudaDeviceProp props;
		415	+ HANDLE_ERROR(cudaGetDeviceProperties(&props, 0));
		416	+ ga.gpuInitializationfrommain(M, cM, nP, tP, nC);
		417	+
		418	+//feture selection for class selected by user with user arguments (make it binary class data by making chosen class label as 1 and al other class labels 0 from multiclass data )
		419	+//to select feature for all classes in joint class data using binary class system need to write a script with loop covering all classes
		420	+ //if(binaryClass){
		421	+ // binaryclassifier(binClassOne);
		422	+ //}
		423	+
		424	+//============================= GA evolution by generations ====================================================
		425	+ std::vector<unsigned int> bestgnome; //holds best gnome after each generation evaluation
		426	+ size_t bestG_Indx; //This gives index of best gnome in the current population to get best gnome and its fitness value
		427	+ unsigned int* newPop = (unsigned int) calloc(ga.p ga.f, sizeof(unsigned int)); //temprory storage of new population
		428	+ double* best_S = (double*) calloc (ga.gnrtn, sizeof(double)); //stores fitness value of best gnome at each iteration
		429	+ float* lda = (float) calloc (ga.p (nC-1) * ga.f, sizeof(float)); //stores LDA basis for each gnome so that we can have best gnome's LDA basis
		430	+ float* sb = (float) calloc( ga.p ga.f * ga.f , sizeof(float)) ; //3d matrix for between class scatter (each 2d matrix between class scatter for one gnome)
		431	+ float* sw = (float) calloc( ga.p ga.f * ga.f , sizeof(float)) ; //3d matrix for within class scatter (each 2d matrix within class scatter for one gnome)
		432	+ ga.zerobandcheck(M, true); //checking bands with all zeros and duplicated bands in a gnome replacing them with other bands avoiding duplication and zero mean
		433	+ ga.zerobandcheck(M, true); //Repeating zeroband cheack as some of these bands are not replaced in previous run and gave random results
		434	+ time_t gpu_timestart = time(NULL); //start a timer for total evoluation
		435	+
		436	+ for (size_t gen = 0; gen < ga.gnrtn; gen++){ //for each generation find fitness value of all gnomes in population matrix and generate population for next generation
		437	+ //std::cout<<"Generation: "<<gen<<std::endl;
		438	+ fitnessFunction(sb, sw, lda, M , cM, ga.f, props, gen, profilefile); //Evaluate phe(feature matrix for current population) for fitness of all gnomes in current population
		439	+ timer.start(); //start timer for new population generation
		440	+ bestG_Indx = ga.evolvePopulation(newPop, M, debug); //evolve population to generate new generation population
		441	+ const auto pop_generation = timer.time_elapsed(); // end timer for new population generation
		442	+ if(gen >ga.gnrtn -2){
		443	+ std::cout << "population evolution time "<<std::chrono::duration_cast<std::chrono::microseconds>(pop_generation).count() << "us" << std::endl;
		444	+ profilefile<<"population evolution time "<<std::chrono::duration_cast<std::chrono::microseconds>(pop_generation).count() << "us" <<std::endl;
		445	+ }
		446	+
		447	+ best_S[gen] = ga.S[bestG_Indx]; //score of best gnome in current generation
		448	+ bestgnome = ga.getGnome(bestG_Indx); //Best gnome of current populaation
		449	+ ga.generateNewP(newPop); //replace current population with new populaiton in the ga classs object
		450	+ ga.zerobandcheck(M, false); //checking bands with all zeros and duplicated bands in a gnome replacing them with other bands avoiding duplication and zero mean
		451	+ ga.zerobandcheck(M, false); //Repeating zeroband cheack as some of these bands are not replaced in previous run and gave random results
		452	+ }//end generation
		453	+
		454	+ time_t gpu_timeend = time(NULL); //end a timer for total evoluation
		455	+ std::cout<<"Total gpu time: "<<gpu_timeend - gpu_timestart<<" s"<<std::endl;
		456	+ profilefile<<"Total gpu time: "<<gpu_timeend - gpu_timestart<<" s"<<std::endl;
		457	+
		458	+//================================ Results of GA ===============================================================
		459	+ std::cout<<"best gnome's fitness value is "<<best_S[ga.gnrtn-1]<<std::endl;
		460	+ std::cout<<"best gnome is: ";
		461	+ for(size_t i = 0; i < ga.f; i++){
		462	+ std::cout<<" "<<(bestgnome.at(i));
		463	+ }
		464	+ std::cout<<std::endl;
		465	+
		466	+ //create a text file to store the LDA stats (features subset and LDA-basis)
		467	+ ////format of CSV file is: 1st row - band index, 2nd LDA basis depending on number of classes, 3rd - wavenumber corresponding to band index and it --trim is selected then trim wavnumbersare also given
		468	+ std::ofstream csv(outfile.c_str(), std::ios::out); //open outfstream for outfile
		469	+ size_t ldaindx = bestG_Indx * (nC-1) * ga.f ; //Compute LDA basis index of best gnome
		470	+
		471	+ //if(binaryClass){ //this option is for binary class feature selection from joint classes but not fully implemented
		472	+ // csv<<binClassOne<<std::endl;
		473	+ //}
		474	+
		475	+ //fitness values of best gnome is
		476	+ csv<<"best gnome's fitness value is "<<best_S[ga.gnrtn-1]<<std::endl; //output fitness value of best gnome in last generation
		477	+ //output gnome i.e. band index of selected featurs
		478	+ csv<<(bestgnome.at(0)); //output feature subset
		479	+ for(size_t i = 1; i < ga.f; i++)
		480	+ csv<<","<<(bestgnome.at(i));
		481	+ csv<<std::endl;
		482	+
		483	+ //output LDA basis of size r X f, r is nC - 1 as LDA projection is rank limited by number of classes - 1
		484	+ for (size_t i = 0; i < nC-1; i++){
		485	+ csv<<lda[ldaindx + i * ga.f ];
		486	+ for (size_t j = 1; j < ga.f; j++){
		487	+ csv<<","<<lda[ldaindx + i * ga.f +j];
		488	+ }
		489	+ csv << std::endl;
		490	+ }
		491	+ //output actual wavelenths corresponding to those band indices
		492	+ csv << (wavelengths[bestgnome.at(0)]);
		493	+ for (size_t i = 1; i < ga.f; i++)
		494	+ csv << "," << (wavelengths[bestgnome.at(i)]);
		495	+ csv << std::endl;
		496	+
		497	+
		498	+ if (args["trim"].is_set()) {
		499	+ csv << "trim info" << std::endl;
		500	+ std::sort(bestgnome.begin(), bestgnome.end()); //sort features index in best gnome
		501	+
		502	+ std::vector<unsigned int> trimindex(ga.f); //create a vector to store temprory trim index bounds
		503	+ std::vector<unsigned int> finaltrim_ind; //create a vector to store final trim index bounds
		504	+ std::vector<unsigned int> trim_wv; //create a vector to store final trim wavelength bounds
		505	+
		506	+ //trim index
		507	+ trimindex.push_back(1); //1st trimming band index is 1
		508	+ for (size_t i = 0; i < ga.f; i++) { // for each feature find its bound indexes
		509	+ trimindex[i * 2] = bestgnome.at(i) - 1;
		510	+ trimindex[i * 2 + 1] = bestgnome.at(i) + 1;
		511	+ }
		512	+ trimindex.push_back(B); //last bound index is B
		513	+
		514	+ //organize trim index
		515	+ int k = 0;
		516	+ for (size_t i = 0; i < ga.f + 1; i++) { // find valid pair of trim indices bound excluding adjacent trim indices
		517	+ if (trimindex[2 * i] < trimindex[2 * i + 1]) {
		518	+ finaltrim_ind.push_back(trimindex[2 * i]); //this is left bound
		519	+ finaltrim_ind.push_back(trimindex[2 * i + 1]);
		520	+ k = k + 2;
		521	+ }
		522	+ }
		523	+ //add duplicated trim indices as single index to final trim index
		524	+ for (size_t i = 0; i < ga.f + 1; i++) { //check each pair of trim indices for duplications
		525	+ if (trimindex[2 * i] == trimindex[2 * i + 1]) { // (duplication caused due to adjacent features)
		526	+ finaltrim_ind.push_back(trimindex[2 * i]); // remove duplicated trim indices replace by one
		527	+ k = k + 1;
		528	+ }
		529	+ }
		530	+
		531	+
		532	+ ////output actual wavelenths corresponding to those trim indices
		533	+ ////these wavenumber are grouped in pairs, check each pair, if duplicated numbers are there in pair delete one and keep other as band to trim, if 2nd wavnumber is smaller than 1st in a pair ignore that pair
		534	+ ////e.g [1, 228, 230, 230, 232, 350,352, 351, 353, 1200] pairas [1(start)-228,230-230, 232-350, 352-351, 353-1200(end)], trimming wavenumbers are [1-228, 230, 233-350, 353-1200]
		535	+ csv << (wavelengths[finaltrim_ind.at(0)]);
		536	+ for (size_t i = 1; i < ga.f * + 2 ; i++)
		537	+ csv << "," << (wavelengths[finaltrim_ind.at(i)]);
		538	+ csv << std::endl;
		539	+ } //end trim option
		540	+
		541	+
		542	+ //free gpu pointers
		543	+ ga.gpu_Destroy();
		544	+
		545	+ //free pointers
		546	+ std::free(sb);
		547	+ std::free(sw);
		548	+ std::free(M);
		549	+ std::free(cM);
		550	+ std::free(best_S);
		551	+ std::free(lda);
		552	+ std::free(newPop);
		553	+
		554	+}//end main
		555	+
		556	+

src/timer.h 0 → 100644

Show/Hide comments View file @0d84034

		1	+++ a/src/timer.h
		1	+#ifndef GA_GPU_TIMER_H
		2	+#define GA_GPU_TIMER_H
		3	+
		4	+#include <chrono>
		5	+//#include <thread>
		6	+//#include <iostream>
		7	+
		8	+//using namespace std::chrono;
		9	+
		10	+class Timer {
		11	+typedef std::chrono::high_resolution_clock Clock;
		12	+
		13	+Clock::time_point epoch;
		14	+public:
		15	+ void start(){
		16	+ epoch = Clock::now();
		17	+ }
		18	+ Clock::duration time_elapsed() const{
		19	+ return Clock::now() - epoch;
		20	+ }
		21	+};
		22	+
		23	+#endif
		24	+
		25	+
		26	+//class Timer {
		27	+// std::chrono::time_point<std::chrono::high_resolution_clock> epoch;
		28	+//
		29	+// public:
		30	+// typedef high_resolution_clock Clock;
		31	+// void start(){
		32	+// epoch = Clock::now();
		33	+// }
		34	+// std::chrono::time_point<std::chrono::high_resolution_clock> time_elapsed() const {
		35	+// return Clock::now() - epoch;
		36	+// }
		37	+//};
0	\ No newline at end of file	38	\ No newline at end of file