Commit 0d84034253937b68843ec545a0050dd6abaccf5c

Authored by David Mayerich
0 parents

public release commit

CMakeLists.txt 0 → 100644
  1 +++ a/CMakeLists.txt
  1 +#Specify the version being used aswell as the language
  2 +cmake_minimum_required(VERSION 2.8)
  3 +
  4 +#Name your project here
  5 +project(ga-gpu)
  6 +
  7 +#set the module directory
  8 +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
  9 +
  10 +#default to release mode
  11 +if(NOT CMAKE_BUILD_TYPE)
  12 + set(CMAKE_BUILD_TYPE Release)
  13 +endif(NOT CMAKE_BUILD_TYPE)
  14 +
  15 +#build the executable in the binary directory on MS Visual Studio
  16 +if ( MSVC )
  17 + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
  18 + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
  19 +endif ( MSVC )
  20 +#MAYBE REMOVE-----------------
  21 +#set C++11 flags if using GCC
  22 +if( CMAKE_COMPILER_IS_GNUCC )
  23 +# SET( CMAKE_CXX_FLAGS "-std=c++11")
  24 + set(CMAKE_CXX_FLAGS "-std=c++11 -D_FORCE_INLINES")
  25 +# SET( CUDA_NVCC_FLAGS "-std=c++11")
  26 +endif( CMAKE_COMPILER_IS_GNUCC )
  27 +
  28 +SET( CUDA_NVCC_FLAGS "--gpu-architecture=compute_50 --gpu-code=sm_50,compute_50")
  29 +#-----------------------------
  30 +
  31 +
  32 +
  33 +#find packages-----------------------------------
  34 +#find OpenCV
  35 +find_package(OpenCV REQUIRED)
  36 +add_definitions(-DUSING_OPENCV)
  37 +
  38 +#find the pthreads package
  39 +find_package(Threads)
  40 +
  41 +#find the X11 package
  42 +find_package(X11)
  43 +
  44 +#find the STIM library
  45 +find_package(STIM)
  46 +
  47 +#find CUDA, mostly for LA stuff using cuBLAS
  48 +find_package(CUDA REQUIRED)
  49 +
  50 +#find Boost for Unix-based file lists
  51 +if( CMAKE_COMPILER_IS_GNUCC )
  52 + find_package(Boost COMPONENTS filesystem system)
  53 + if(Boost_FOUND)
  54 + include_directories(${Boost_INCLUDE_DIR})
  55 + else()
  56 + message(FATAL_ERROR "HSIproc requires Boost::filesystem and Boost::system when using GCC")
  57 + endif()
  58 +endif()
  59 +
  60 +#find FANN
  61 +#find_package(FANN REQUIRED)
  62 +
  63 +#find the GLUT library for visualization
  64 +#find_package(OpenGL REQUIRED)
  65 +#find_package(GLUT REQUIRED)
  66 +#if(WIN32)
  67 +# find_package(GLEW REQUIRED)
  68 +# include_directories(${GLEW_INCLUDE_DIR})
  69 +#endif(WIN32)
  70 +
  71 +#find LAPACK and supporting link_libraries
  72 +find_package(LAPACKE REQUIRED)
  73 +
  74 +#include include directories
  75 +include_directories(${CUDA_INCLUDE_DIRS}
  76 + ${OpenCV_INCLUDE_DIRS}
  77 + ${LAPACKE_INCLUDE_DIR}
  78 + ${STIM_INCLUDE_DIRS}
  79 + ${OpenGL_INCLUDE_DIRS}
  80 +# ${GLUT_INCLUDE_DIR}
  81 + ${FANN_INCLUDE_DIRS}
  82 + "${CMAKE_SOURCE_DIR}/src"
  83 +)
  84 +
  85 +#Assign a variable for all of the header files in this project
  86 +include_directories("${CMAKE_SOURCE_DIR}/src")
  87 +#file(GLOB GACPU_H "${CMAKE_SOURCE_DIR}/src/gacpu/*.h")
  88 +file(GLOB GAGPU_H "${CMAKE_SOURCE_DIR}/src/*.h")
  89 +#file(GLOB GA_H "${CMAKE_SOURCE_DIR}/src/*.h")
  90 +
  91 +#Assign source files to the appropriate variables to easily associate them with executables
  92 +#file(GLOB GA_CPU_SRC "${CMAKE_SOURCE_DIR}/src/gacpu/*.cpp")
  93 +file(GLOB GA_GPU_SRC "${CMAKE_SOURCE_DIR}/src/*.c*")
  94 +
  95 +
  96 +#create an executable file
  97 +cuda_add_executable(ga-gpu
  98 + ${GAGPU_H}
  99 +# ${GA_H}
  100 + ${GA_GPU_SRC}
  101 +)
  102 +target_link_libraries(ga-gpu ${CUDA_LIBRARIES}
  103 + ${CUDA_CUBLAS_LIBRARIES}
  104 + ${CUDA_CUFFT_LIBRARIES}
  105 + ${LAPACKE_LIBRARIES}
  106 + ${LAPACK_LIBRARIES}
  107 + ${BLAS_LIBRARIES}
  108 + ${CMAKE_THREAD_LIBS_INIT}
  109 + ${X11_LIBRARIES}
  110 + ${OpenCV_LIBS}
  111 +)
  112 +
  113 +
  114 +#create the PROC executable----------------------------------------------
  115 +
  116 +#create an executable file
  117 +#add_executable(hsiga
  118 +# ${GACPU_H}
  119 +# ${GA_H}
  120 +# ${GA_CPU_SRC}
  121 +#)
  122 +#target_link_libraries(hsiga ${LAPACKE_LIBRARIES}
  123 +# ${LAPACK_LIBRARIES}
  124 +# ${BLAS_LIBRARIES}
  125 +# ${CMAKE_THREAD_LIBS_INIT}
  126 +# ${X11_LIBRARIES}
  127 +# ${OpenCV_LIBS}
  128 +#)
  129 +
  130 +
  131 +
  132 +#if Boost is found, set an environment variable to use with preprocessor directives
  133 +if(Boost_FILESYSTEM_FOUND)
  134 +# if(BUILD_GACPU)
  135 +# target_link_libraries(hsiga ${Boost_FILESYSTEM_LIBRARIES}
  136 +# ${Boost_SYSTEM_LIBRARY}
  137 +# )
  138 + #message(${Boost_FILESYSTEM_LIBRARIES})
  139 +# endif(BUILD_GACPU)
  140 +# if(BUILD_GAGPU)
  141 + target_link_libraries(ga-gpu ${Boost_FILESYSTEM_LIBRARIES}
  142 + ${Boost_SYSTEM_LIBRARY}
  143 + )
  144 +# endif(BUILD_GAGPU)
  145 +endif(Boost_FILESYSTEM_FOUND)
FindGLEW.cmake 0 → 100644
  1 +++ a/FindGLEW.cmake
  1 +# Copyright (c) 2012-2016 DreamWorks Animation LLC
  2 +#
  3 +# All rights reserved. This software is distributed under the
  4 +# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ )
  5 +#
  6 +# Redistributions of source code must retain the above copyright
  7 +# and license notice and the following restrictions and disclaimer.
  8 +#
  9 +# * Neither the name of DreamWorks Animation nor the names of
  10 +# its contributors may be used to endorse or promote products derived
  11 +# from this software without specific prior written permission.
  12 +#
  13 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  14 +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  15 +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  16 +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  17 +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL,
  18 +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  19 +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20 +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21 +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22 +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  23 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24 +# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE
  25 +# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00.
  26 +#
  27 +
  28 +#-*-cmake-*-
  29 +# - Find GLEW
  30 +#
  31 +# Author : Nicholas Yue yue.nicholas@gmail.com
  32 +#
  33 +# This auxiliary CMake file helps in find the GLEW headers and libraries
  34 +#
  35 +# GLEW_FOUND set if Glew is found.
  36 +# GLEW_INCLUDE_DIR GLEW's include directory
  37 +# GLEW_glew_LIBRARY GLEW libraries
  38 +# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context)
  39 +
  40 +FIND_PACKAGE ( PackageHandleStandardArgs )
  41 +
  42 +FIND_PATH( GLEW_LOCATION include/GL/glew.h
  43 + "$ENV{GLEW_ROOT}"
  44 + NO_DEFAULT_PATH
  45 + NO_SYSTEM_ENVIRONMENT_PATH
  46 + )
  47 +
  48 +FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW
  49 + REQUIRED_VARS GLEW_LOCATION
  50 + )
  51 +
  52 +IF ( GLEW_LOCATION )
  53 +
  54 + SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path")
  55 +
  56 + SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
  57 + IF (GLEW_USE_STATIC_LIBS)
  58 + IF (APPLE)
  59 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
  60 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  61 + NO_DEFAULT_PATH
  62 + NO_SYSTEM_ENVIRONMENT_PATH
  63 + )
  64 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  65 + NO_DEFAULT_PATH
  66 + NO_SYSTEM_ENVIRONMENT_PATH
  67 + )
  68 + # MESSAGE ( "APPLE STATIC" )
  69 + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
  70 + ELSEIF (WIN32)
  71 + # Link library
  72 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
  73 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib )
  74 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib )
  75 + ELSE (APPLE)
  76 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
  77 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  78 + NO_DEFAULT_PATH
  79 + NO_SYSTEM_ENVIRONMENT_PATH
  80 + )
  81 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  82 + NO_DEFAULT_PATH
  83 + NO_SYSTEM_ENVIRONMENT_PATH
  84 + )
  85 + # MESSAGE ( "LINUX STATIC" )
  86 + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
  87 + ENDIF (APPLE)
  88 + ELSE ()
  89 + IF (APPLE)
  90 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib")
  91 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib )
  92 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib )
  93 + ELSEIF (WIN32)
  94 + # Link library
  95 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
  96 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib )
  97 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib )
  98 + # Load library
  99 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll")
  100 + FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin
  101 + NO_DEFAULT_PATH
  102 + NO_SYSTEM_ENVIRONMENT_PATH
  103 + )
  104 + FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin
  105 + NO_DEFAULT_PATH
  106 + NO_SYSTEM_ENVIRONMENT_PATH
  107 + )
  108 + ELSE (APPLE)
  109 + # Unices
  110 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  111 + NO_DEFAULT_PATH
  112 + NO_SYSTEM_ENVIRONMENT_PATH
  113 + )
  114 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  115 + NO_DEFAULT_PATH
  116 + NO_SYSTEM_ENVIRONMENT_PATH
  117 + )
  118 + ENDIF (APPLE)
  119 + ENDIF ()
  120 + # MUST reset
  121 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES})
  122 +
  123 + SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library")
  124 + SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library")
  125 +
  126 +ENDIF ()
FindGLUT.cmake 0 → 100644
  1 +++ a/FindGLUT.cmake
  1 +#.rst:
  2 +# FindGLUT
  3 +# --------
  4 +#
  5 +# try to find glut library and include files.
  6 +#
  7 +# IMPORTED Targets
  8 +# ^^^^^^^^^^^^^^^^
  9 +#
  10 +# This module defines the :prop_tgt:`IMPORTED` targets:
  11 +#
  12 +# ``GLUT::GLUT``
  13 +# Defined if the system has GLUT.
  14 +#
  15 +# Result Variables
  16 +# ^^^^^^^^^^^^^^^^
  17 +#
  18 +# This module sets the following variables:
  19 +#
  20 +# ::
  21 +#
  22 +# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc.
  23 +# GLUT_LIBRARIES, the libraries to link against
  24 +# GLUT_FOUND, If false, do not try to use GLUT.
  25 +#
  26 +# Also defined, but not for general use are:
  27 +#
  28 +# ::
  29 +#
  30 +# GLUT_glut_LIBRARY = the full path to the glut library.
  31 +# GLUT_Xmu_LIBRARY = the full path to the Xmu library.
  32 +# GLUT_Xi_LIBRARY = the full path to the Xi Library.
  33 +
  34 +#=============================================================================
  35 +# Copyright 2001-2009 Kitware, Inc.
  36 +#
  37 +# Distributed under the OSI-approved BSD License (the "License");
  38 +# see accompanying file Copyright.txt for details.
  39 +#
  40 +# This software is distributed WITHOUT ANY WARRANTY; without even the
  41 +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  42 +# See the License for more information.
  43 +#=============================================================================
  44 +# (To distribute this file outside of CMake, substitute the full
  45 +# License text for the above reference.)
  46 +
  47 +if (WIN32)
  48 + find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h
  49 + PATHS $ENV{GLUT_ROOT_PATH}/include )
  50 +
  51 + if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  52 + find_library( GLUT_glut_LIBRARY NAMES freeglut
  53 + PATHS
  54 + $ENV{GLUT_ROOT_PATH}/lib/x64
  55 +
  56 + NO_DEFAULT_PATH
  57 + )
  58 + else( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  59 + find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut
  60 + PATHS
  61 + ${OPENGL_LIBRARY_DIR}
  62 + $ENV{GLUT_ROOT_PATH}/lib
  63 + )
  64 + endif( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  65 +
  66 +else ()
  67 +
  68 + if (APPLE)
  69 + find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR})
  70 + find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX")
  71 + find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX")
  72 +
  73 + if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa)
  74 + add_library(GLUT::Cocoa UNKNOWN IMPORTED)
  75 + # Cocoa should always be a Framework, but we check to make sure.
  76 + if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$")
  77 + set_target_properties(GLUT::Cocoa PROPERTIES
  78 + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}")
  79 + else()
  80 + set_target_properties(GLUT::Cocoa PROPERTIES
  81 + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}")
  82 + endif()
  83 + endif()
  84 + else ()
  85 +
  86 + if (BEOS)
  87 +
  88 + set(_GLUT_INC_DIR /boot/develop/headers/os/opengl)
  89 + set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86)
  90 +
  91 + else()
  92 +
  93 + find_library( GLUT_Xi_LIBRARY Xi
  94 + /usr/openwin/lib
  95 + )
  96 +
  97 + find_library( GLUT_Xmu_LIBRARY Xmu
  98 + /usr/openwin/lib
  99 + )
  100 +
  101 + if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi)
  102 + add_library(GLUT::Xi UNKNOWN IMPORTED)
  103 + set_target_properties(GLUT::Xi PROPERTIES
  104 + IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}")
  105 + endif()
  106 +
  107 + if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu)
  108 + add_library(GLUT::Xmu UNKNOWN IMPORTED)
  109 + set_target_properties(GLUT::Xmu PROPERTIES
  110 + IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}")
  111 + endif()
  112 +
  113 + endif ()
  114 +
  115 + find_path( GLUT_INCLUDE_DIR GL/glut.h
  116 + /usr/include/GL
  117 + /usr/openwin/share/include
  118 + /usr/openwin/include
  119 + /opt/graphics/OpenGL/include
  120 + /opt/graphics/OpenGL/contrib/libglut
  121 + ${_GLUT_INC_DIR}
  122 + )
  123 +
  124 + find_library( GLUT_glut_LIBRARY glut
  125 + /usr/openwin/lib
  126 + ${_GLUT_glut_LIB_DIR}
  127 + )
  128 +
  129 + unset(_GLUT_INC_DIR)
  130 + unset(_GLUT_glut_LIB_DIR)
  131 +
  132 + endif ()
  133 +
  134 +endif ()
  135 +
  136 +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR)
  137 +
  138 +if (GLUT_FOUND)
  139 + # Is -lXi and -lXmu required on all platforms that have it?
  140 + # If not, we need some way to figure out what platform we are on.
  141 + set( GLUT_LIBRARIES
  142 + ${GLUT_glut_LIBRARY}
  143 + ${GLUT_Xmu_LIBRARY}
  144 + ${GLUT_Xi_LIBRARY}
  145 + ${GLUT_cocoa_LIBRARY}
  146 + )
  147 +
  148 + if(NOT TARGET GLUT::GLUT)
  149 + add_library(GLUT::GLUT UNKNOWN IMPORTED)
  150 + set_target_properties(GLUT::GLUT PROPERTIES
  151 + INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}")
  152 + if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$")
  153 + set_target_properties(GLUT::GLUT PROPERTIES
  154 + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}")
  155 + else()
  156 + set_target_properties(GLUT::GLUT PROPERTIES
  157 + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}")
  158 + endif()
  159 +
  160 + if(TARGET GLUT::Xmu)
  161 + set_property(TARGET GLUT::GLUT APPEND
  162 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu)
  163 + endif()
  164 +
  165 + if(TARGET GLUT::Xi)
  166 + set_property(TARGET GLUT::GLUT APPEND
  167 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi)
  168 + endif()
  169 +
  170 + if(TARGET GLUT::Cocoa)
  171 + set_property(TARGET GLUT::GLUT APPEND
  172 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa)
  173 + endif()
  174 + endif()
  175 +
  176 + #The following deprecated settings are for backwards compatibility with CMake1.4
  177 + set (GLUT_LIBRARY ${GLUT_LIBRARIES})
  178 + set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR})
  179 +endif()
  180 +
  181 +mark_as_advanced(
  182 + GLUT_INCLUDE_DIR
  183 + GLUT_glut_LIBRARY
  184 + GLUT_Xmu_LIBRARY
  185 + GLUT_Xi_LIBRARY
  186 + )
FindLAPACKE.cmake 0 → 100644
  1 +++ a/FindLAPACKE.cmake
  1 +# - Try to find LAPACKE
  2 +#
  3 +# Once done this will define
  4 +# LAPACKE_FOUND - System has LAPACKE
  5 +# LAPACKE_INCLUDE_DIRS - The LAPACKE include directories
  6 +# LAPACKE_LIBRARIES - The libraries needed to use LAPACKE
  7 +# LAPACKE_DEFINITIONS - Compiler switches required for using LAPACKE
  8 +#
  9 +# Usually, LAPACKE requires LAPACK and the BLAS. This module does
  10 +# not enforce anything about that.
  11 +
  12 +find_path(LAPACKE_INCLUDE_DIR
  13 + NAMES lapacke.h
  14 + PATHS $ENV{LAPACK_PATH} ${INCLUDE_INSTALL_DIR}
  15 + PATHS ENV INCLUDE)
  16 +
  17 +find_library(LAPACKE_LIBRARY liblapacke lapacke
  18 + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
  19 + PATHS ENV LIBRARY_PATH
  20 + PATHS ENV LD_LIBRARY_PATH)
  21 +
  22 +if(MSVC)
  23 + find_library(LAPACK_LIBRARY liblapack lapack
  24 + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
  25 + PATHS ENV LIBRARY_PATH
  26 + PATHS ENV LD_LIBRARY_PATH)
  27 +
  28 + find_library(BLAS_LIBRARY libblas blas
  29 + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
  30 + PATHS ENV LIBRARY_PATH
  31 + PATHS ENV LD_LIBRARY_PATH)
  32 +
  33 +else()
  34 + find_library(LAPACK REQUIRED)
  35 + find_library(BLAS REQUIRED)
  36 +endif()
  37 +set(LAPACKE_LIBRARIES ${LAPACKE_LIBRARY} ${LAPACK_LIBRARY} ${BLAS_LIBRARY})
  38 +
  39 +include(FindPackageHandleStandardArgs)
  40 +find_package_handle_standard_args(LAPACKE DEFAULT_MSG
  41 + LAPACKE_INCLUDE_DIR
  42 + LAPACKE_LIBRARIES)
  43 +mark_as_advanced(LAPACKE_INCLUDE_DIR LAPACKE_LIBRARIES)
FindSTIM.cmake 0 → 100644
  1 +++ a/FindSTIM.cmake
  1 +# finds the STIM library (downloads it if it isn't present)
  2 +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
  3 +
  4 +include(FindPackageHandleStandardArgs)
  5 +
  6 +set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
  7 +
  8 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
  9 +
  10 +if(STIM_FOUND)
  11 + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
  12 +elseif(STIM_FOUND)
  13 + #if the STIM library isn't found, download it
  14 + #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists
  15 + #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git")
  16 + #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
  17 + #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH)
  18 + message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.")
  19 + message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib")
  20 +endif(STIM_FOUND)
  21 +
  22 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
src/basic_functions.h 0 → 100644
  1 +++ a/src/basic_functions.h
  1 +#include <stdio.h>
  2 +
  3 +
  4 +size_t* sortIndx(float* input, size_t size){
  5 + //sort indices of score in ascending order (fitness value)
  6 + size_t *idx;
  7 + idx = (size_t*) malloc (size * sizeof (size_t));
  8 + for (size_t i = 0; i < size; i++)
  9 + idx[i] = i;
  10 +
  11 + for (size_t i=0; i<size; i++){
  12 + for (size_t j=i+1; j<size; j++){
  13 + if (input[idx[i]] < input[idx[j]]){
  14 + std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  15 + }
  16 + }
  17 + }
  18 + return idx; //use as sortSIdx in selection
  19 +}
  20 +
  21 +
  22 +template<typename T>
  23 +void mtxMul(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){
  24 + //compute output matrix M3 of size row1 X column2 and data is column major
  25 + for(size_t i = 0 ; i <r1; i++){
  26 + for(size_t j = 0; j< c2; j++){
  27 + T temp = 0;
  28 + for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication
  29 + temp+= M1[i * c1 + k] * M2[k * c2 + j]; //compute an element of output matrix
  30 + }
  31 + M3[i * c1 + j] = temp; //copy an element to output matrix
  32 + }
  33 + }
  34 +}
  35 +
  36 +template<typename T>
  37 +void mtxMultranspose(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){
  38 + //compute output matrix M3 of size row1 X column2 and data is column major
  39 + for(size_t i = 0 ; i <r1; i++){
  40 + for(size_t j = 0; j< r2; j++){
  41 + T temp = 0;
  42 + for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication
  43 + temp+= M1[i * c1 + k] * M2[j * c2 + k]; //compute an element of output matrix
  44 + }
  45 + M3[i * r1 + j] = temp; //copy an element to output matrix
  46 + }
  47 + }
  48 +}
  49 +
  50 + //display within class scatter
  51 +template<typename T>
  52 +void displayS(T* sw, size_t f){
  53 +
  54 + for(size_t g = 0; g<1; g++){
  55 + std::cout<<std::endl;
  56 + for(size_t j = 0; j < f; j++){ //total number of features in a gnome
  57 + for(size_t k = 0; k < f; k++){ //total number of features in a gnome
  58 + std::cout<<sw[g*f*f + j*f + k]<<" ";
  59 + }
  60 + std::cout<<std::endl;
  61 + }
  62 + }
  63 + std::cout<<std::endl;
  64 +}
  65 +
  66 +//sort eigenvalues from lapacke results
  67 +size_t* sortEigenVectorIndx(float* eigenvalue, size_t N){
  68 + //sort indices of score in ascending order (fitness value)
  69 + size_t *idx = (size_t*) malloc (N * sizeof (size_t));
  70 + for (size_t i = 0; i < N; i++)
  71 + idx[i] = i;
  72 +
  73 + for (size_t i=0; i<N; i++){
  74 + for (size_t j=i+1; j<N; j++){
  75 + if (eigenvalue[idx[i]] > eigenvalue[idx[j]]){
  76 + std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  77 + }
  78 + }
  79 + }
  80 +
  81 + std::cout<<"best eigenvalue index: "<<eigenvalue[idx[0]]<<std::endl;
  82 +
  83 + return idx; //use as sortSIdx in selection
  84 +
  85 +}
src/enviload.h 0 → 100644
  1 +++ a/src/enviload.h
  1 +#include <iostream>
  2 +#include <fstream>
  3 +#include <thread>
  4 +#include <random>
  5 +#include <vector>
  6 +//#include <algorithm>
  7 +
  8 +#define NOMINMAX
  9 +
  10 +//stim libraries
  11 +#include <stim/envi/envi.h>
  12 +#include <stim/image/image.h>
  13 +#include <stim/parser/arguments.h>
  14 +#include <stim/ui/progressbar.h>
  15 +#include <stim/parser/filename.h>
  16 +//#include <stim/visualization/colormap.h>
  17 +#include <stim/parser/table.h>
  18 +
  19 +std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel#
  20 +//loads spectral features into a feature matrix based on a set of class images (or masks)
  21 +float* load_features(size_t nC, size_t tP, size_t B, stim::envi E, std::vector< unsigned int > nP){
  22 + float progress = 0; //initialize the progress bar variable
  23 + unsigned long long bytes_fmat = sizeof(float) * tP * B; //calculate the number of bytes in the feature matrix
  24 + std::cout<<"totalnumber of samples "<<tP<<std::endl;
  25 + std::cout<<"Allocating space for the feature matrix: "<<tP<<" x "<<B<<" = "<<(float)bytes_fmat/(float)1048576<<"MB"<<std::endl;
  26 + float* F = (float*) malloc(bytes_fmat); //allocate space for the sifted matrix
  27 + std::cout<<"Loading Training Data ("<<nC<<" classes)"<<std::endl;
  28 + //load all of the training spectra into an array
  29 + unsigned long long F_idx = 0; //initialize the matrix index to 0
  30 + //unsigned long long R_idx = 0;
  31 + for(unsigned c = 0; c < nC; c++){ //for each class image
  32 + std::cout<<"\tSifting class "<<c+1<<" = "<<nP[c]<<" pixels..."<<std::endl;
  33 + // std::thread t1 = std::thread(progress_thread_envi, &E); //start the progress bar thread
  34 + E.sift((void*)&F[F_idx], C[c].data(), true); //sift that class into the matrix at the proper location
  35 + F_idx += nP[c] * B;
  36 + progress = (float)(c+1) / (float)nC * 100;
  37 + // t1.join();
  38 + }
  39 +
  40 + return F;
  41 +}
  42 +
  43 +/// Load responses for a Random Forest Classifier
  44 +unsigned int* ga_load_responses(size_t tP, size_t nC, std::vector< unsigned int > nP){
  45 + unsigned int* T = (unsigned int*)malloc(tP*sizeof(unsigned int)); //generate an OpenCV vector of responses
  46 + size_t R_idx = 0; //index into the response array
  47 + for(size_t c = 0; c < nC; c++){ //for each class image
  48 + for(unsigned long long l = 0; l < nP[c]; l++){ //assign a response for all pixels of class c loaded in the training matrix
  49 + T[R_idx + l] = (unsigned int)c+1;
  50 + }
  51 + R_idx += nP[c]; //increment the response vector index
  52 + }
  53 + return T;
  54 +}
  55 +
  56 +
  57 +//loads the necessary data for training a random forest classifier
  58 +std::vector< unsigned int > ga_load_class_images(int argc, stim::arglist args, size_t* nC, size_t* tP){
  59 + if(args["classes"].nargs() < 2){ //if fewer than two classes are specified, there's a problem
  60 + std::cout<<"ERROR: training requires at least two class masks"<<std::endl;
  61 + exit(1);
  62 + }
  63 + std::vector< unsigned int > nP;
  64 + size_t num_images = args["classes"].nargs(); //count the number of class images
  65 + //size_t num_images = args["rf"].nargs(); //count the number of class images
  66 + //std::vector<std::string> filenames(num_images); //initialize an array of file names to store the names of the images
  67 + std::string filename; //allocate space to store the filename for an image
  68 + for(size_t c = 0; c < num_images; c++){ //for each image
  69 + filename = args["classes"].as_string(c);; //get the class image file name
  70 + stim::image<unsigned char> image(filename); //load the image
  71 + //push_training_image(image.channel(0), nC, tP, nP); //push channel zero (all class images are assumed to be single channel)
  72 + C.push_back(image.channel(0));
  73 + unsigned int npixels = (unsigned int)image.channel(0).nnz();
  74 + nP.push_back(npixels); //push the number of pixels onto the pixel array
  75 + *tP += npixels; //add to the running total of pixels
  76 + *nC = *nC + 1;
  77 + }
  78 +
  79 + return nP;
  80 +}
  81 +
  82 +void display_PixelfeatureNclass(float* F, unsigned int* T, size_t B, size_t Idx){
  83 + //display code for debug, displaying Idx th pixel from feature matrix F with all features B
  84 + std::cout<<"class of pixel["<<Idx<<"]" <<"is: "<<T[Idx]<<std::endl;
  85 + std::cout<<"feature["<<Idx<<"] is: "<<std::endl;
  86 + for (size_t i = 0; i< B; i++)
  87 + std::cout<<" "<<F[Idx * B + i];
  88 +}
  89 +
  90 +
  91 +void display_args(int argc, stim::arglist args){
  92 + std::cout<<"number of arguments "<<argc<<std::endl;
  93 + std::cout<<"arg 0 "<<args.arg(0)<<std::endl;
  94 + std::cout<<"arg 1 "<<args.arg(1)<<std::endl;
  95 +}
  96 +
  97 +void display_dataSize(size_t X, size_t Y, size_t B){
  98 + std::cout<<"number of samples "<<X*Y<<std::endl;
  99 + std::cout<<"number of bands "<<B<<std::endl;
  100 +
  101 +}
  102 +
  103 +void display_phe(float* phe, unsigned int* P, size_t p,size_t f, size_t i, size_t j){
  104 + //display code for debug, displaying jth pixel from new feature matrix which is created for gnome i
  105 + std::cout<<"phe["<<i<<"]["<<j<<"]"<<std::endl;
  106 + for(unsigned int n = 0; n < f; n++){
  107 + std::cout<<P[i * f + n]; //spectral feature indices from gnome i of current population
  108 + std::cout<<" "<<phe[i* (p * f) +j * f + n]<<std::endl; //display 100th pixel value corresponding to feature indices in the gnome
  109 +
  110 + }
  111 +}
  112 +
  113 +
  114 +void display_gnome(unsigned int* P,size_t f,size_t gIdx){
  115 + //display code for debug, displaying gnome gIdx of current population, gnome is subset of feature indices
  116 + for (size_t i = 0; i< f; i++)
  117 + std::cout<<" "<<P[gIdx * f + i];
  118 +}
  119 +
src/ga_gpu.cu 0 → 100644
  1 +++ a/src/ga_gpu.cu
  1 +#ifndef GA_GPU_CU
  2 +#define GA_GPU_CU
  3 +
  4 +//#include <cuda.h>
  5 +//#include "cuda_runtime.h"
  6 +//#include <cuda_runtime_api.h>
  7 +//#include "device_launch_parameters.h"
  8 +#include <stim/cuda/cudatools/error.h>
  9 +
  10 +#include "timer.h"
  11 +//#include <stdio.h>
  12 +//#include <stdlib.h>
  13 +#include <iostream>
  14 +#include <fstream>
  15 +
  16 +extern Timer timer;
  17 +
  18 +
  19 +__global__ void kernel_computeSb(float* gpuSb, unsigned int* gpuP, float* gpuM, float* gpuCM, size_t ub, size_t f, size_t p, size_t nC, unsigned int* gpu_nPxInCls){
  20 +
  21 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix
  22 + size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome
  23 + size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //if we use 3d grid then it is needed
  24 +
  25 +
  26 + if(gnomeIndx >= p || i >= f || j >= f) return; //handling segmentation fault
  27 +
  28 + //form a sb matrix from vector sbVec, multiply each element in matrix with num of pixels in the current class
  29 + //and add it to previous value of between class scatter matrix sb
  30 + float tempsbval;
  31 + size_t n1;
  32 + size_t n2;
  33 + size_t classIndx; //class index in class mean matrix
  34 +
  35 + for(size_t c = 0; c < nC; c++){
  36 + tempsbval = 0;
  37 + classIndx = c * ub;
  38 + n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix
  39 + n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix
  40 + tempsbval = ((gpuCM[classIndx + n1] - gpuM[n1]) *(gpuCM[classIndx + n2] - gpuM[n2])) * (float)gpu_nPxInCls[c] ;
  41 + gpuSb[gnomeIndx * f * f + j * f + i] += tempsbval;
  42 + }
  43 +}
  44 +
  45 +
  46 +//Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f)
  47 +__global__ void kernel_computeSw(float* gpuSw, unsigned int* gpuP, float* gpuCM, float* gpuF, unsigned int* gpuT, size_t ub, size_t f, size_t p, size_t nC, size_t tP){
  48 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix
  49 + size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome
  50 + size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //total number of individuals
  51 +
  52 + if(gnomeIndx >= p || i >= f || j >= f) return; //handling segmentation fault
  53 + float tempswval;
  54 +
  55 + size_t n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix
  56 + size_t n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix
  57 + tempswval = 0;
  58 + for(size_t c = 0; c < nC; c++){
  59 + tempswval = 0;
  60 + for(size_t k = 0; k < tP; k++){
  61 + if(gpuT[k] == (c+1) ){
  62 + tempswval += ((gpuF[ k * ub + n1] - gpuCM[c * ub + n1]) * (gpuF[k * ub + n2] - gpuCM[c * ub + n2]));
  63 + }
  64 + }
  65 + gpuSw[gnomeIndx * f * f + j * f + i] += tempswval;
  66 + }
  67 +}
  68 +
  69 +
  70 +
  71 +
  72 + //=============================gpu intialization=============================================
  73 + /// Initialize all GPU pointers used in the GA-GPU algorithm
  74 + /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population
  75 + /// @param p is the population size
  76 + /// @param f is the number of desired features
  77 + /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean
  78 + /// @param cpuM is a pointer to the class mean on the CPU
  79 + /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class
  80 + /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class
  81 + /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices
  82 + /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices
  83 + /// @param gpuF is the destination for the GPU feature matrix
  84 + /// @param cpuF is the complete feature matrix on the CPU
  85 +
  86 + void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation
  87 + float** gpuCM, float* cpuCM, size_t nC, unsigned int ub,
  88 + float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls,
  89 + float** gpuSb, float** gpuSw,
  90 + float** gpuF, float* cpuF,
  91 + unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls){
  92 +
  93 + HANDLE_ERROR(cudaMalloc(gpuP, p * f * sizeof(unsigned int))); //allocate space for the population on the GPU
  94 +
  95 + HANDLE_ERROR(cudaMalloc(gpuCM, nC * ub * sizeof(float))); //allocate space for the class mean and copy it to the GPU
  96 + HANDLE_ERROR(cudaMemcpy(*gpuCM, cpuCM, nC * ub * sizeof(float), cudaMemcpyHostToDevice));
  97 +
  98 +
  99 + HANDLE_ERROR(cudaMalloc(gpuM, ub * sizeof(float))); //allocate space for the mean of the feature matrix
  100 + HANDLE_ERROR(cudaMemcpy(*gpuM, cpuM, ub * sizeof(float), cudaMemcpyHostToDevice));
  101 +
  102 + HANDLE_ERROR(cudaMalloc(gpu_nPxInCls, nC * sizeof(unsigned int))); //number of pixels in each class
  103 + HANDLE_ERROR(cudaMemcpy(*gpu_nPxInCls, cpu_nPxInCls, nC * sizeof(unsigned int), cudaMemcpyHostToDevice));
  104 +
  105 +
  106 + HANDLE_ERROR(cudaMalloc(gpuSb, p * f * f * sizeof(float))); //allocate memory for sb which is calculated for eery class separately and added together in different kernel
  107 + HANDLE_ERROR(cudaMalloc(gpuSw, p * f * f * sizeof(float)));
  108 +
  109 + HANDLE_ERROR(cudaMalloc(gpuF, tP * ub * sizeof(float)));
  110 + HANDLE_ERROR(cudaMemcpy(*gpuF, cpuF, tP * ub * sizeof(float), cudaMemcpyHostToDevice));
  111 +
  112 + HANDLE_ERROR(cudaMalloc(gpuT, tP * sizeof(unsigned int)));
  113 + HANDLE_ERROR(cudaMemcpy(*gpuT, cpuT, tP* sizeof(unsigned int), cudaMemcpyHostToDevice));
  114 +
  115 + }
  116 +
  117 + //computation on GPU
  118 + /// Initialize all GPU pointers used in the GA-GPU algorithm
  119 + /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population
  120 + /// @param p is the population size
  121 + /// @param f is the number of desired features
  122 + /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices
  123 + /// @param cpuSb is the between-class scatter matrix on the GPU (this function will copy the GPU result there)
  124 + /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices
  125 + /// @param cpuSw is the within-class scatter matrix on the GPU (this function will copy the GPU result there)
  126 +
  127 + /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean
  128 + /// @param cpuM is a pointer to the class mean on the CPU
  129 + /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class
  130 + /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class
  131 +
  132 + /// @param gpuF is the destination for the GPU feature matrix
  133 + /// @param cpuF is the complete feature matrix on the CPU
  134 + void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f,
  135 + float* gpuSb, float* cpuSb,
  136 + float* gpuSw, float* cpuSw,
  137 + float* gpuF, unsigned int* gpuT,float* gpuM, float* gpuCM,
  138 + size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile){
  139 +
  140 + timer.start();
  141 + HANDLE_ERROR(cudaMemcpy(gpuP, cpuP, p * f * sizeof(unsigned int), cudaMemcpyHostToDevice));
  142 + HANDLE_ERROR(cudaMemset(gpuSb, 0, p * f * f * sizeof(float)));
  143 +
  144 + //grid configuration of GPU
  145 + size_t threads = (size_t)sqrt(props.maxThreadsPerBlock);
  146 + if(threads > f) threads = f;
  147 + size_t numberofblocksfor_f = (size_t)ceil((float)f/ threads);
  148 + dim3 blockdim((int)threads, (int)threads, 1);
  149 + dim3 griddim((int)numberofblocksfor_f, (int)numberofblocksfor_f, (int)p); //X dimension blocks will cover all gnomes of the population and each block will have as many gnomes as it can feet
  150 + //sharedbytes calculation
  151 + size_t sharedBytes = p * f * sizeof(unsigned int); //copy population to shared memory
  152 + if(props.sharedMemPerBlock < sharedBytes) sharedBytes = props.sharedMemPerBlock;
  153 +
  154 + //launch kernel to compute sb matrix
  155 + kernel_computeSb<<<griddim, blockdim, sharedBytes>>>(gpuSb, gpuP, gpuM, gpuCM, ub, f, p, nC, gpu_nPxInCls);
  156 + cudaDeviceSynchronize();
  157 +
  158 + HANDLE_ERROR(cudaMemcpy(cpuSb, gpuSb, p * f * f * sizeof(float), cudaMemcpyDeviceToHost)); //copy between class scatter from gpu to cpu
  159 + const auto elapsedg1 = timer.time_elapsed();
  160 + if(gen > gnrtn -2){
  161 + std::cout << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl;
  162 + profilefile << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl;
  163 + }
  164 +
  165 + timer.start();
  166 + //Compute within class scatter
  167 + HANDLE_ERROR(cudaMemset(gpuSw, 0, p * f * f * sizeof(float)));
  168 +
  169 + //launch kernel to compute sb matrix
  170 + kernel_computeSw<<<griddim, blockdim>>>(gpuSw, gpuP, gpuCM, gpuF, gpuT, ub, f, p, nC, tP);
  171 + cudaDeviceSynchronize();
  172 + //copy between class scatter from gpu to cpu
  173 + HANDLE_ERROR(cudaMemcpy(cpuSw, gpuSw, p * f * f * sizeof(float), cudaMemcpyDeviceToHost));
  174 + const auto elapsedg2 = timer.time_elapsed();
  175 + if(gen > gnrtn - 2){
  176 + std::cout << "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl;
  177 + profilefile<< "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl;
  178 + }
  179 +
  180 + }
  181 +
  182 + //free all gpu pointers
  183 + void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT){
  184 +
  185 + HANDLE_ERROR(cudaFree(gpuP));
  186 + HANDLE_ERROR(cudaFree(gpuCM));
  187 + HANDLE_ERROR(cudaFree(gpuM));
  188 + HANDLE_ERROR(cudaFree(gpu_nPxInCls));
  189 + HANDLE_ERROR(cudaFree(gpuSb));
  190 + HANDLE_ERROR(cudaFree(gpuSw));
  191 + HANDLE_ERROR(cudaFree(gpuF));
  192 + HANDLE_ERROR(cudaFree(gpuT));
  193 + }
  194 +
  195 +#endif
  196 +
src/ga_gpu.h 0 → 100644
  1 +++ a/src/ga_gpu.h
  1 +#ifndef GA_GPU_H
  2 +#define GA_GPU_H
  3 +
  4 +#include <iostream>
  5 +#include <thread>
  6 +#include <complex>
  7 +#include <cv.h>
  8 +#include <stdio.h>
  9 +#include <stdlib.h>
  10 +#include <iostream>
  11 +
  12 +#include "timer.h"
  13 +
  14 +#include "basic_functions.h"
  15 +//LAPACKE support for Visual Studio
  16 +
  17 +#ifndef LAPACK_COMPLEX_CUSTOM
  18 +#define LAPACK_COMPLEX_CUSTOM
  19 +#define lapack_complex_float std::complex<float>
  20 +#define lapack_complex_double std::complex<double>
  21 +#include "lapacke.h"
  22 +#endif
  23 +
  24 +
  25 +#define LAPACK_ROW_MAJOR 101
  26 +#define LAPACK_COL_MAJOR 102
  27 +
  28 +//CUDA functions
  29 +void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation
  30 + float** gpuCM, float* cpuCM, size_t nC, unsigned int ub,
  31 + float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls,
  32 + float** gpuSb, float** gpuSw,
  33 + float** gpuF, float* cpuF,
  34 + unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls);
  35 +void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f,
  36 + float* gpuSb, float* cpuSb,
  37 + float* gpuSw, float* cpuSw,
  38 + float* gpuF, unsigned int* T, float* gpuM, float* gpuCM,
  39 + size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile);
  40 +void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT);
  41 +
  42 +struct _fcomplex { float re, im; };
  43 +typedef struct _fcomplex fcomplex;
  44 +
  45 +Timer timer;
  46 +
  47 +class ga_gpu {
  48 +
  49 +public:
  50 + float* F; //pointer to the raw data in host memory
  51 + unsigned int* T; //pointer to the class labels in host memory
  52 + size_t gnrtn; //total number of generations
  53 + size_t p; //population size
  54 + size_t f; // number of features to be selected
  55 +
  56 + unsigned int* P; //pointer to population of current generation genotype matrix (p x f)
  57 + float* S; //pointer to score(fitness value) of each gnome from current population matric P
  58 + unsigned int* i_guess; //initial guess of features if mentioined in args add to initial population
  59 + unsigned int ub; //upper bound for gnome value (maximum feature index from raw feature matrix F)
  60 + unsigned int lb; //lower bound for gnome value (minimum feature index from raw feature matrix F = 0)
  61 + float uniformRate;
  62 + float mutationRate;
  63 + size_t tournamentSize; //number of potential gnomes to select parent for crossover
  64 + bool elitism; //if true then passes best gnome to next generation
  65 +
  66 + //declare gpu pointers
  67 + float* gpuF; //Feature matrix
  68 + unsigned int* gpuT; //target responses of entire feature matrix
  69 + unsigned int* gpuP; //population matrix
  70 + unsigned int* gpu_nPxInCls;
  71 + float* gpuCM; //class mean of entire feature matrix
  72 + float* gpuM; //total mean of entire feature matrix
  73 + float* gpuSb; //between class scatter for all individuals of current population
  74 + float* gpuSw; //within class scatter for all individuals of current population
  75 +
  76 + //constructor
  77 + ga_gpu() {}
  78 +
  79 + //==============================generate initial population
  80 +
  81 + void initialize_population(std::vector<unsigned int> i_guess, bool debug) {
  82 + if (debug) {
  83 + std::cout << std::endl;
  84 + std::cout << "initial populatyion is: " << std::endl;
  85 + }
  86 +
  87 + lb = 0;
  88 + P = (unsigned int*)calloc(p * f, sizeof(unsigned int)); //allcate memory for genetic population(indices of features from F), p number of gnomes of size f
  89 + S = (float*)calloc(p, sizeof(float)); //allcate memory for scores(fitness value) of each gnome from P
  90 +
  91 + srand(1);
  92 + //add intial guess to the population if specified by user as a output of other algorithm or by default just random guess
  93 + std::memcpy(P, i_guess.data(), f * sizeof(unsigned int));
  94 +
  95 + //generate random initial population
  96 + for (size_t i1 = 1; i1 < p; i1++) {
  97 + for (size_t i2 = 0; i2 < f; i2++) {
  98 + P[i1 * f + i2] = rand() % ub + lb; //select element of gnome as random feature index within lower bound(0) and upper bound(B)
  99 + if (debug) std::cout << P[i1 * f + i2] << "\t";
  100 + }
  101 + if (debug) std::cout << std::endl;
  102 + }
  103 + }
  104 +
  105 + //===================generation of new population==========================================
  106 +
  107 + size_t evolvePopulation(unsigned int* newPop, float* M, bool debug) {
  108 +
  109 + //gget index of best gnome in the current population
  110 + size_t bestG_Indx = gIdxbestGnome();
  111 + //-------------(reproduction)-------
  112 + if (elitism) {
  113 + saveGnomeIdx(0, bestG_Indx, newPop); //keep best gnome from previous generation to new generation
  114 + }
  115 + // ------------Crossover population---------------
  116 + int elitismOffset;
  117 + if (elitism) {
  118 + elitismOffset = 1;
  119 + }
  120 + else {
  121 + elitismOffset = 0;
  122 + }
  123 +
  124 + //Do crossover for rest of population size
  125 + for (int i = elitismOffset; i <p; i++) {
  126 + // std::cout<<"crossover of gnome "<<i<<std::endl;
  127 + std::vector<unsigned int>gnome1;
  128 + gnome1.reserve(f);
  129 + gnome1 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes
  130 + // displaygnome(gnome1);
  131 + std::vector<unsigned int>gnome2;
  132 + gnome2.reserve(f);
  133 + gnome2 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes
  134 + // displaygnome(gnome2);
  135 + std::vector<unsigned int>gnome;
  136 + gnome.reserve(f);
  137 + gnome = crossover(gnome1, gnome2, M); //Do crossover of above parent gnomes to produce new gnome
  138 + // displaygnome(gnome);
  139 + saveGnome(i, gnome, newPop); //save crosseover result to new population
  140 + }
  141 +
  142 + //--------------Mutate population------------
  143 + // introduce some mutation in new population
  144 + for (int i = elitismOffset; i <p; i++) {
  145 + //std::cout<<"mutation of gnome"<<std::endl;
  146 + std::vector<unsigned int>gnome;
  147 + gnome.reserve(f);
  148 +
  149 + for (size_t n = 0; n < f; n++)
  150 + gnome.push_back(newPop[i*f + n]);
  151 + //std::cout<<"\n starting address "<<(&newPop[0] + i*f)<<"\t end address is "<<(&newPop[0] + i*f + f-1) <<std::endl;
  152 + //std::copy((&newPop[0] + i*f), (&newPop[0] + i*f +f-1), gnome.begin());
  153 + // displaygnome(gnome);
  154 + mutate(gnome);
  155 + // displaygnome(gnome);
  156 + saveGnome(i, gnome, newPop); //save new gnome to new population at position i
  157 + }
  158 + return bestG_Indx;
  159 + }
  160 +
  161 + //============================== functions for population evolution ===========================================================================
  162 + std::vector<unsigned int> tournamentSelection(size_t tSize) {
  163 + // Create a tournament population
  164 + unsigned int* tournamentP = (unsigned int*)malloc(tSize * f * sizeof(unsigned int));
  165 + std::vector<float>tournamentS;
  166 +
  167 + // For each place in the tournament get a random individual
  168 + for (size_t i = 0; i < tSize; i++) {
  169 + size_t rndmIdx = rand() % p + lb;
  170 + tournamentS.push_back(S[rndmIdx]);
  171 + //for (size_t n = 0; n <f; n++)
  172 + //tournamentP[i * f + n] = (getGnome(rndmIdx)).at(n);
  173 + std::vector<unsigned int> temp_g(getGnome(rndmIdx));
  174 + std::copy(temp_g.begin(), temp_g.end(), tournamentP + i*f);
  175 + }
  176 + // Get the fittest
  177 + std::vector<unsigned int>fittestgnome;
  178 + fittestgnome.reserve(f);
  179 +
  180 + //select index of best gnome from fitness score
  181 + size_t bestSIdx = 0;
  182 + for (size_t i = 0; i < tSize; i++) {
  183 + if (tournamentS[i] < tournamentS[bestSIdx])
  184 + bestSIdx = i; //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  185 + }
  186 +
  187 + for (size_t n = 0; n < f; n++)
  188 + fittestgnome.push_back(tournamentP[bestSIdx * f + n]);
  189 + return fittestgnome;
  190 + } //end of tournament selection
  191 +
  192 +
  193 + std::vector<unsigned int> crossover(std::vector<unsigned int> gnome1, std::vector<unsigned int> gnome2, float* M) {
  194 + std::vector<unsigned int> gnome;
  195 + for (size_t i = 0; i < f; i++) {
  196 + // Crossover
  197 + float r = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
  198 + if (r <= uniformRate) {
  199 + gnome.push_back(gnome1.at(i));
  200 + }
  201 + else {
  202 + gnome.push_back(gnome2.at(i));
  203 + }
  204 + }
  205 +
  206 + //check new gnome for all zero bands and duplicated values
  207 + std::vector<unsigned int> gnomeunique;
  208 + int flag = 0;
  209 + std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
  210 + std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique));
  211 + /* if(gnomeunique.size()< gnome.size()){
  212 + flag = 1;
  213 + std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
  214 + }*/
  215 + unsigned int featureband, featureband1, featureband2;
  216 + if (gnomeunique.size() < f) {
  217 + for (size_t k = gnomeunique.size(); k < f; k++) {
  218 + featureband = rand() % ub + lb;
  219 + for (size_t i = 0; i < f; i++) {
  220 + featureband1 = gnome1.at(i);
  221 + featureband2 = gnome2.at(i);
  222 + for (size_t j = 0; j < gnomeunique.size(); j++) {
  223 + if (gnomeunique.at(j) != featureband1) {
  224 + featureband = featureband1;
  225 + }
  226 + else if (gnomeunique.at(j) != featureband2) {
  227 + featureband = featureband2;
  228 + }
  229 + else if (gnomeunique.at(j) == featureband) {
  230 + featureband = rand() % ub + lb;
  231 + while (M[featureband] == 0) {
  232 + featureband = rand() % ub + lb;
  233 + }
  234 + }
  235 + }
  236 + }
  237 + gnomeunique.push_back(featureband);
  238 + }
  239 + }
  240 + //if(flag ==1){
  241 + // std::cout<<"\n original gnome "<<g<<" are "<<std::endl;
  242 + // for(int k = 0; k < gnome.size(); k++)
  243 + // std::cout<<gnome[k]<<"\t";
  244 + // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl;
  245 + // for(int k = 0; k < gnomeunique.size(); k++)
  246 + // std::cout<<gnomeunique[k]<<"\t";
  247 + //}
  248 +
  249 + return gnomeunique;
  250 + }
  251 +
  252 + void mutate(std::vector<unsigned int> gnome) {
  253 + for (size_t i = 0; i < f; i++) {
  254 + float LO = (float)0.01;
  255 + float HI = 1;
  256 + float r3 = LO + static_cast <float> (rand()) / (static_cast <float> (RAND_MAX / (HI - LO)));
  257 + //if random value is less than mutationRate then mutate this gnome
  258 + if (r3 <= mutationRate) {
  259 + gnome.at(i) = (rand() % ub + lb);
  260 + gnome.push_back(rand() % ub + lb);
  261 + }
  262 + }
  263 + }
  264 +
  265 + ///returns gnome of given index
  266 + std::vector<unsigned int> getGnome(size_t idx) {
  267 + std::vector<unsigned int> gnome;
  268 + gnome.reserve(f);
  269 + //pulling gnome idx from population P
  270 + for (size_t n = 0; n < f; n++)
  271 + gnome.push_back(P[idx * f + n]);
  272 + //memcpy(&gnome[0], P+idx*f, f*sizeof(size_t));
  273 + return gnome;
  274 + }
  275 +
  276 + //save gnome of index gIdx from previous population at position i in the new population
  277 + void saveGnomeIdx(size_t i, size_t gIdx, unsigned int* newPop) {
  278 + for (size_t n = 0; n < f; n++)
  279 + newPop[i * f + n] = P[gIdx * f + n];
  280 + }
  281 +
  282 + void saveGnome(size_t idx, std::vector<unsigned int>gnome, unsigned int* newPop) {
  283 + std::copy(gnome.begin(), gnome.end(), newPop + idx*f);
  284 + }
  285 +
  286 + size_t gIdxbestGnome() {
  287 + //std::cout<<"best gnome indes is: "<<sortSIndx()[0];
  288 + return sortSIndx()[0];
  289 + }
  290 +
  291 + void displaygnome(std::vector<unsigned int> gnome) {
  292 + std::cout << "\t gnome: ";
  293 + for (int i = 0; i<gnome.size(); ++i)
  294 + std::cout << gnome[i] << ' ';
  295 + std::cout << std::endl;
  296 + }
  297 +
  298 + //---------------------post processing of score-------------------------------------
  299 + void Snorm() { //normalize gnome scores
  300 + double s;
  301 + for (size_t i = 0; i < p; i++) {
  302 + s += S[i]; //sum of all gnome score in population
  303 + }
  304 + //std::cout<<"mean Score is: "<<(double) s/p;
  305 + for (size_t i = 0; i <p; i++)
  306 + S[i] = S[i] / s;
  307 + }
  308 +
  309 + size_t* sortSIndx() { //sort gnome index according to gnome scores
  310 + //sort indices of score in ascending order (fitness value)
  311 + size_t *idx = (size_t*)malloc(p * sizeof(size_t)); //array to hold sorted gnome index
  312 + for (size_t i = 0; i < p; i++) { //initialize index array from 1 to p(population size) in an ascending order
  313 + idx[i] = i;
  314 + }
  315 +
  316 + for (size_t i = 0; i<p; i++) { //sort gnome indices according to score values using bubble sort
  317 + for (size_t j = i + 1; j<p; j++) {
  318 + if (S[idx[i]] > S[idx[j]]) {
  319 + std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  320 + }
  321 + }
  322 + }
  323 +
  324 + //display best gnome
  325 + //std::cout << "best fitness value: " << S[idx[0]] << std::endl;
  326 + /*if (S[idx[0]] < 0) {
  327 + std::cout << "best gnome is " << std::endl;
  328 + for (size_t i = 0; i < f; i++)
  329 + std::cout << P[f * idx[0] + i] << ", ";
  330 + std::cout << std::endl;
  331 + }*/
  332 +
  333 + return idx; //use as sortSIdx in selection
  334 + }
  335 +
  336 +
  337 + //size_t* sortIndx(float* input, size_t size) {
  338 + // //sort indices of score in ascending order (fitness value)
  339 + // size_t *idx;
  340 + // idx = (size_t*)malloc(size * sizeof(size_t));
  341 + // for (size_t i = 0; i < size; i++)
  342 + // idx[i] = i;
  343 +
  344 + // for (size_t i = 0; i<size; i++) {
  345 + // for (size_t j = i + 1; j<size; j++) {
  346 + // if (input[idx[i]] < input[idx[j]]) {
  347 + // std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  348 + // }
  349 + // }
  350 + // }
  351 + // return idx; //use as sortSIdx in selection
  352 +
  353 + //}
  354 +
  355 + void generateNewP(unsigned int* newPop) {
  356 + //std::memcpy(P, 0 , p * f *sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s
  357 + std::memcpy(P, newPop, p * f * sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s
  358 + }
  359 +
  360 + //============================== functions for fitness function ===========================================================================
  361 + //compute total mean M (1 X B) of all features (tP X B)
  362 + void ttlMean(float* M, size_t tP, size_t B) {
  363 + //std::cout<<"total number of pixels are "<<tP<<std::endl;
  364 + for (int k = 0; k < tP; k++) { //total number of pixel in feature matrix
  365 + for (size_t n = 0; n < B; n++) { // index of feature in ith gnome
  366 + M[n] += F[k * B + n];
  367 + }
  368 + }
  369 + for (size_t n = 0; n < B; n++) //take an avarage of above summation
  370 + M[n] = M[n] / (float)tP;
  371 + }
  372 +
  373 + void dispalymean(float* M) { //display mean
  374 + std::cout << std::endl;
  375 + std::cout << "Total mean of gnome 1 features are is " << std::endl;
  376 +
  377 + for (size_t i = 0; i < 1; i++) {
  378 + for (size_t j = 0; j < f; j++) {
  379 + size_t index = P[i*f + j];
  380 + std::cout << "feature index " << index << "\t total mean" << M[index] << std::endl;
  381 + }
  382 + }
  383 + std::cout << std::endl;
  384 + }
  385 +
  386 + //Compute class means cM (p x nC x f) of all gnome features phe(tP x f)
  387 + void classMean(float* cM, size_t tP, size_t nC, size_t B, std::vector<unsigned int> nPxInCls) {
  388 + for (size_t c = 0; c < nC; c++) { //index of class feature matrix responses
  389 + float* tempcM = (float*)calloc(B, sizeof(float)); //tempcM holds classmean vector for current gnome 'i', class 'c'
  390 + for (size_t k = 0; k < tP; k++) { //total number of pixel in feature matrix
  391 + if (T[k] == c + 1) { //class numbers start from 1 not 0
  392 + for (size_t n = 0; n < B; n++) { //total number of features in a gnome
  393 + tempcM[n] += F[k * B + n]; //add phe value for feature n of class 'c' in ith gnome
  394 + }
  395 + }
  396 + }
  397 + for (size_t n = 0; n < B; n++)
  398 + cM[c * B + n] = tempcM[n] / (float)nPxInCls[c]; //divide by number of pixels from class 'c'
  399 +
  400 + }
  401 +
  402 + }
  403 +
  404 + //display class mean
  405 + void dispalyClassmean(float* cM, size_t nC) {
  406 + std::cout << std::endl;
  407 + std::cout << "class mean of gnome 1 with total classes " << nC << " is :" << std::endl;
  408 + for (size_t i = 0; i < 1; i++) {
  409 + for (size_t c = 0; c < nC; c++) {
  410 + for (size_t j = 0; j < f; j++) {
  411 + size_t index = P[i*f + j];
  412 +
  413 + std::cout << "class index: " << c << "\t feature index " << index << "\t class mean " << cM[c * ub + index] << std::endl;
  414 + }
  415 + }
  416 + }
  417 + std::cout << std::endl;
  418 + }
  419 +
  420 + //-----------------------------------------between and within class Scattering computation---------------------------------------------------------------
  421 + //computation on CPU
  422 + void cpu_computeSbSw(float* sb, float* sw, float* M, float* cM, size_t nC, size_t tP, std::vector<unsigned int> nPxInCls) {
  423 + timer.start();
  424 + computeSb(sb, M, cM, nC, nPxInCls); //compute between class scatter on CPU
  425 + const auto elapsed = timer.time_elapsed();
  426 + std::cout << "Sb CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count() << "us" << std::endl;
  427 +
  428 + timer.start();
  429 + computeSw(sw, cM, nC, tP); //compute within class scatter on CPU
  430 + const auto elapsed1 = timer.time_elapsed();
  431 + std::cout << "Sw CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
  432 + }
  433 +
  434 + //display between class scatter
  435 + void displaySb(float* sb) {
  436 + std::cout << "between scatter is " << std::endl;
  437 + for (size_t g = 0; g<1; g++) {
  438 + std::cout << std::endl;
  439 + for (size_t j = 0; j < f; j++) { //total number of features in a gnome
  440 + for (size_t k = 0; k < f; k++) { //total number of features in a gnome
  441 + std::cout << sb[g * f * f + j * f + k] << " ";
  442 + }
  443 + std::cout << std::endl;
  444 + }
  445 + }
  446 + std::cout << std::endl;
  447 + }
  448 +
  449 + //Compute between class scatter sb (p x f x f) of all gnome features phe(tP x f)
  450 + void computeSb(float* sb, float* M, float* cM, size_t nC, std::vector<unsigned int> nPxInCls) {
  451 + float tempsbval;
  452 + size_t n1;
  453 + size_t n2;
  454 + size_t classIndx; //class index in class mean matrix
  455 + /*std::cout <<"population of computation of cpusb "<< std::endl;
  456 + for (size_t i2 = 0; i2 < f; i2++) {
  457 + std::cout << P[i2] << "\t";
  458 + }*/
  459 +
  460 + for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) {
  461 + for (size_t c = 0; c < nC; c++) {
  462 + for (size_t i = 0; i < f; i++) {
  463 + for (size_t j = 0; j < f; j++) {
  464 + tempsbval = 0;
  465 + classIndx = c * ub;
  466 + n1 = P[gnomeIndx * f + i]; //actual feature index in original feature matrix
  467 + n2 = P[gnomeIndx * f + j]; //actual feature index in original feature matrix
  468 + // std::cout << "i: " << i << " j: " <<j<< " n1: " << n1 << " n2:" << n2 << std::endl;
  469 + tempsbval = ((cM[classIndx + n1] - M[n1]) *(cM[classIndx + n2] - M[n2]));
  470 + sb[gnomeIndx * f * f + i * f + j] += tempsbval * (float)nPxInCls[c]; // compute tempsb[j][k] element of class 'c' of gnome 'i'
  471 + }
  472 + }
  473 + }
  474 + }
  475 +
  476 + }
  477 +
  478 + //Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f)
  479 + void computeSw(float* sw, float* cM, size_t nC, size_t tP) {
  480 + float tempswval;
  481 + size_t n1;
  482 + size_t n2;
  483 + size_t cMclass; //class index in class mean matrix
  484 + size_t Pg;
  485 + size_t swg;
  486 + size_t pheg;
  487 + for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) {
  488 + Pg = gnomeIndx * f;
  489 + swg = gnomeIndx * f * f;
  490 + pheg = gnomeIndx * tP * f;;
  491 + for (size_t c = 0; c < nC; c++) {
  492 + cMclass = c * ub;
  493 +
  494 + for (size_t k = 0; k < tP; k++) {
  495 + if (T[k] == (c + 1)) {
  496 + for (size_t i = 0; i < f; i++) {
  497 + for (size_t j = 0; j < f; j++) {
  498 + n1 = P[Pg + i]; //actual feature index in original feature matrix
  499 + n2 = P[Pg + j]; //actual feature index in original feature matrix
  500 +
  501 + tempswval = 0;
  502 + tempswval = ((F[k * ub + n1] - cM[cMclass + n1]) * (F[k * ub + n2] - cM[cMclass + n2]));
  503 + //tempswval = ((phe[gnomeIndx * tP * f + k * f + i] - cM[c * ub + P[gnomeIndx * f + i]]) * (phe[gnomeIndx * tP *f + k * f + j] - cM[c * ub + P[gnomeIndx * f + j]]));
  504 + sw[gnomeIndx * f * f + i * f + j] += tempswval;
  505 + }
  506 + }
  507 + }
  508 + }
  509 + }
  510 +
  511 + }
  512 + }
  513 + //checking bands with all zeros and replacing duplicated bands in gnome but this function is only for initial population
  514 + //void zerobandcheck(float* M, bool initial) {
  515 + // for (size_t g = 0; g < p; g++) { // for each gnome
  516 + // for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome
  517 + // while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population
  518 + // P[g * f + i] = rand() % ub + lb;
  519 + // }
  520 + // }
  521 + // //checking for duplicats in a gnome
  522 + // std::vector<unsigned int> gnome = getGnome(g);
  523 + // std::vector<unsigned int> gnomeunique;
  524 + // int flag = 0; //flag will be set if gnome has duplicated band (feature) index
  525 + // std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
  526 + // std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //keep only unique copies of indices and remove duplicate copies
  527 + // if (gnomeunique.size()< gnome.size()) {
  528 + // flag = 1; //set flag for those if there are duplicated indices
  529 + // //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
  530 + // }
  531 +
  532 + // //adding extra random feature indices to unique copy of gnome to achive gnome size = f
  533 + // if (gnomeunique.size() < f) {
  534 + // for (size_t k = gnomeunique.size(); k < f; k++) {
  535 + // unsigned int rnumber = rand() % ub + lb;
  536 + // //check if this randomaly generated number is already present in that gnome or not
  537 + // for (size_t j = 0; j < gnomeunique.size(); j++) {
  538 + // if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number
  539 + // rnumber = rand() % ub + lb;
  540 + // j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome
  541 + // }
  542 + // }
  543 + // gnomeunique.push_back(rnumber); //add feature index to gnomeunique
  544 + // }
  545 + // }
  546 + // std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f);
  547 + // }
  548 + //}
  549 +
  550 + //checking bands with all zeros and replacing duplicated bands in gnome
  551 + void zerobandcheck(float* M, bool initialPop) {
  552 + size_t startgnome;
  553 + if (initialPop) {
  554 + startgnome = 0; //for initial population check all gnomes
  555 + }
  556 + else {
  557 + startgnome = 1; //for next generations start gnome check after elite children offset
  558 + }
  559 + for (size_t g = startgnome; g < p; g++) { // for each gnome except
  560 +
  561 + for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome
  562 + while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population
  563 + P[g * f + i] = rand() % ub + lb;
  564 + }
  565 + }
  566 + //checking for duplicats in a gnome
  567 + std::vector<unsigned int> gnome = getGnome(g); //get current gnome g from population matrix P
  568 + std::vector<unsigned int> gnomeunique; //array to store only unique band indicies in a genome
  569 + int flag = 0; //flag will be set if gnome has duplicated band (feature) index
  570 + std::sort(gnome.begin(), gnome.end()); //sort current gnome
  571 + std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //remove duplicat copies of band indices and keep only unique in a gnome
  572 + if (gnomeunique.size()< gnome.size()) {
  573 + flag = 1; //set flag for those if there are duplicated indices
  574 + //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
  575 + }
  576 +
  577 + //adding extra random feature indices to unique copy of gnome to achive gnome size = f
  578 + if (gnomeunique.size() < f) {
  579 + for (size_t k = gnomeunique.size(); k < f; k++) {
  580 + unsigned int rnumber = rand() % ub + lb;
  581 + //check if this randomaly generated number is already present in that gnome or not
  582 + for (size_t j = 0; j < gnomeunique.size(); j++) {
  583 + if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number
  584 + rnumber = rand() % ub + lb; //generate random number between upper bound and lower bound (ub. lb)
  585 + j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome
  586 + }
  587 + }
  588 + gnomeunique.push_back(rnumber); //add feature index to gnomeunique
  589 + }
  590 + }
  591 +
  592 + //diplay loop only if gnome has duplicated indices
  593 + //if(flag ==1){
  594 + // std::cout<<"\n original gnome "<<g<<" are "<<std::endl;
  595 + // for(int k = 0; k < gnome.size(); k++)
  596 + // std::cout<<gnome[k]<<"\t";
  597 + // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl;
  598 + // for(int k = 0; k < gnomeunique.size(); k++)
  599 + // std::cout<<gnomeunique[k]<<"\t";
  600 + //}
  601 + std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f); //copy new gnome without any duplicate band index at current gnome location
  602 + }
  603 + }
  604 +
  605 +
  606 +
  607 + //gpu calling functions
  608 + //gpu initialization (allocating space for all array on GPU)
  609 + void gpuInitializationfrommain(float* cpuM, float* cpuCM, std::vector<unsigned int>cpu_nPxInCls, size_t tP, size_t nC) {
  610 + // call gpuInitialization(......) with all of the necessary parameters
  611 + gpuIntialization(&gpuP, p, f, &gpuCM, cpuCM, nC, ub, &gpuM, cpuM, &gpu_nPxInCls, &gpuSb, &gpuSw, &gpuF, F, &gpuT, T, tP, &cpu_nPxInCls[0]);
  612 +
  613 + }
  614 +
  615 + //Computation of between class scatter and within class scatter in GPU
  616 + void gpu_computeSbSw(float* cpuSb, float* cpuSw, size_t nC, size_t tP, cudaDeviceProp props, size_t gen, bool debug, std::ofstream& profilefile) {
  617 + //calling function for SW and Sb computation and passing necessary arrays for computation
  618 + // std::cout<<"gpu function calling"<<std::endl;
  619 + gpucomputeSbSw(gpuP, P, p, f, gpuSb, cpuSb, gpuSw, cpuSw, gpuF, gpuT, gpuM, gpuCM, nC, tP, props, gen, gnrtn, ub, gpu_nPxInCls, profilefile);
  620 +
  621 + //display computed Sb and Sw if debug is set
  622 + if (debug) {
  623 + std::cout << "From GA-GPU class: gpu results of Sb sn Sw" << std::endl;
  624 + displayS(cpuSb, f); //display Sb
  625 + displayS(cpuSw, f); //display Sw
  626 + std::cout << std::endl;
  627 + }
  628 + }
  629 +
  630 + //call function to free gpu pointers
  631 + //free all gpu pointers
  632 + void gpu_Destroy() {
  633 + gpuDestroy(gpuP, gpuCM, gpuM, gpu_nPxInCls, gpuSb, gpuSw, gpuF, gpuT);
  634 + }
  635 +
  636 + //Write a destructor here
  637 + ~ga_gpu() {
  638 +
  639 + if (F != NULL) std::free(F); //not sure about this as it is only for 2nd constructor
  640 + if (T != NULL) std::free(T); //same as above
  641 + if (P != NULL) std::free(P); //not sure about this as it is only for 2nd constructor
  642 + if (S != NULL) std::free(S); //same as above
  643 + //if(i_guess!=NULL) std::free(i_guess); //same as above
  644 + //HANDLE_ERROR(cudaDeviceReset());
  645 +
  646 + }
  647 + };
  648 +
  649 +#endif
src/main.cpp 0 → 100644
  1 +++ a/src/main.cpp
  1 +#include <iostream>
  2 +
  3 +//stim libraries
  4 +#include <stim/envi/envi.h>
  5 +#include <stim/image/image.h>
  6 +#include <stim/ui/progressbar.h>
  7 +#include <stim/parser/filename.h>
  8 +#include <stim/parser/table.h>
  9 +#include <stim/parser/arguments.h>
  10 +//input arguments
  11 +stim::arglist args;
  12 +#include <fstream>
  13 +#include <thread>
  14 +#include <random>
  15 +#include <vector>
  16 +#include <math.h>
  17 +#include <limits>
  18 +
  19 +#define NOMINMAX
  20 +
  21 +
  22 +
  23 +//GA
  24 +#include "ga_gpu.h"
  25 +#include "enviload.h"
  26 +
  27 +
  28 +//envi input file and associated parameters
  29 +stim::envi E; //ENVI binary file object
  30 +unsigned int B; //shortcuts storing the spatial and spectral size of the ENVI image
  31 +//mask and class information used for training
  32 +//std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel#
  33 +std::vector<unsigned int> nP; //array holds the number of pixels in each mask: nP[m] is the number of pixels in mask m
  34 +size_t nC = 0; //number of classes
  35 +size_t tP = 0; //total number of pixels in all masks: tP = nP[0] + nP[1] + ... + nP[nC]
  36 +float* fea;
  37 +
  38 +//ga_gpu class object
  39 +ga_gpu ga;
  40 +bool debug;
  41 +bool binaryClass;
  42 +int binClassOne;
  43 +
  44 +//creating struct to pass to thread functions as it limits number of arguments to 3
  45 +typedef struct {
  46 + float* S;
  47 + float* Sb;
  48 + float* Sw;
  49 + float* lda;
  50 +}gnome;
  51 +gnome gnom;
  52 +
  53 +
  54 +void gpuComputeEignS( size_t g, size_t fea){
  55 + //eigen value computation will return r = (nC-1) eigen vectors so new projected data will have dimension of r rather than f
  56 + // std::thread::id this_id = std::this_thread::get_id();
  57 + // std::cout<<"thread id is "<< this_id<<std::endl;
  58 + size_t f = fea;
  59 + //std::thread::id g = std::this_thread::get_id();
  60 + float* LeftEigVectors_a = (float*) malloc(f * f * sizeof(float));
  61 + float* gSw_a = (float*) malloc(f * f * sizeof(float)); //copy of between class scatter
  62 + std::memcpy(gSw_a, &gnom.Sw[g * f * f], f * f *sizeof(float));
  63 + if(debug){
  64 + std::cout<<"From Eigen function: Sb and Sw "<<std::endl;
  65 + displayS(gSw_a, f); //display Sb
  66 + displayS(&gnom.Sb[g * f * f], f); //display Sw
  67 + std::cout<<std::endl;
  68 + }
  69 +
  70 + std::vector<unsigned int> features = ga.getGnome(g);
  71 + std::vector<unsigned int> featuresunique;
  72 + int flag = 0;
  73 + std::sort(features.begin(), features.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
  74 + std::unique_copy(features.begin(), features.end(), std::back_inserter(featuresunique));
  75 + if(featuresunique.size()< features.size()){
  76 + f = featuresunique.size();
  77 + }
  78 +
  79 + size_t r = nC-1; //LDA projected dimension (limited to number of classes - 1 by rank)
  80 + if(r > f){
  81 + r = f;
  82 + }
  83 +
  84 + int info;
  85 + float* EigenvaluesI_a = (float*)malloc(f * sizeof(float));
  86 + float* Eigenvalues_a = (float*)malloc(f * sizeof(float));
  87 + int *IPIV = (int*) malloc(sizeof(int) * f);
  88 + //computing inverse of matrix Sw
  89 + memset(IPIV, 0, f * sizeof(int));
  90 + LAPACKE_sgetrf(LAPACK_COL_MAJOR, (int)f, (int)f, gSw_a, (int)f, IPIV);
  91 + // DGETRI computes the inverse of a matrix using the LU factorization computed by DGETRF.
  92 + LAPACKE_sgetri(LAPACK_COL_MAJOR, (int)f, gSw_a, (int)f, IPIV);
  93 +
  94 + float* gSbSw_a = (float*)calloc(f * f, sizeof(float));
  95 + //mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f * sizeof(float)], f, f, f,f);
  96 + mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f], f, f, f,f);
  97 + if(debug){
  98 + std::cout<<"From Eigen function: inverse of sw and ratio of sb and sw (Sb/Sw)";
  99 + displayS(gSw_a, f); //display inverse of Sw (1/Sw)
  100 + displayS(gSbSw_a, f); //display ratio of Sb and Sw (Sb/Sw)
  101 + }
  102 +
  103 + //compute left eigenvectors for current gnome from ratio of between class scatter and within class scatter: Sb/Sw
  104 + info = LAPACKE_sgeev(LAPACK_COL_MAJOR, 'V', 'N', (int)f, gSbSw_a, (int)f, Eigenvalues_a, EigenvaluesI_a, LeftEigVectors_a, (int)f, 0, (int)f);
  105 + //sort eignevalue indices in descending order
  106 + size_t* sortedindx = sortIndx(Eigenvalues_a, f);
  107 + //displayS(LeftEigVectors_a, f); //display Eignevectors (Note these are -1 * matlab eigenvectors does not change fitness score results but keep in mind while projecting data on it)
  108 + //sorting left eigenvectors (building forward transformation matrix As)
  109 + for (size_t rowE = 0; rowE < r; rowE++){
  110 + for (size_t colE = 0; colE < f; colE++){
  111 + size_t ind1 = g * r * f + rowE * f + colE;
  112 + //size_t ind1 = rowE * f + colE;
  113 + size_t ind2 = sortedindx[rowE] * f + colE; //eigenvector as row vector
  114 + gnom.lda[ind1] = LeftEigVectors_a[ind2];
  115 + }
  116 + }
  117 +
  118 + if(debug){
  119 + std::cout<<"Eigenvalues are"<<std::endl;
  120 + for(size_t n = 0 ; n < f; n ++){
  121 + std::cout << Eigenvalues_a[n] << ", " ;
  122 + }
  123 + std::cout<< std::endl;
  124 + std::cout<<"From Eigen function: Eignevector"<<std::endl;
  125 +
  126 + std::cout<<"LDA basis is "<<std::endl;
  127 + std::cout << "r is " << r << std::endl;
  128 + for(size_t l = 0 ; l < r; l++){
  129 + for(size_t n = 0 ; n < f; n ++){
  130 + std::cout << gnom.lda[g * l * f + l * f + n] << ", " ;
  131 + }
  132 + std::cout<<std::endl;
  133 + }
  134 +
  135 + }
  136 + //Extract only r eigne vectors as a LDA projection basis
  137 + float* tempgSb = (float*)calloc(r * f, sizeof(float));
  138 + //mtxMul(tempgSb, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sb[g * f * f * sizeof(float)], r, f, f,f);
  139 + //mtxMul(tempgSb, &lda[g * r * f ], gSb, r, f, f,f);
  140 + mtxMul(tempgSb, &gnom.lda[g * r * f], &gnom.Sb[g * f * f], r, f, f,f);
  141 + float* nSb = (float*)calloc(r * r, sizeof(float));
  142 + mtxMultranspose(nSb, tempgSb, &gnom.lda[g * r * f], r, f, r, f);
  143 +
  144 + float* tempgSw = (float*)calloc(r * f, sizeof(float));
  145 + //mtxMul(tempgSw, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sw[g * f * f * sizeof(float)], r, f, f,f);
  146 + mtxMul(tempgSw, &gnom.lda[g * r * f], &gnom.Sw[g * f * f], r, f, f,f);
  147 + float* nSw = (float*)calloc(r * r, sizeof(float));
  148 + mtxMultranspose(nSw, tempgSw, &gnom.lda[g * r * f], r, f, r, f);
  149 + if(debug){
  150 + std::cout<<"From Eigen function: projected Sb sn Sw"<<std::endl;
  151 + displayS(nSb, r); //display Sb
  152 + displayS(nSw, r); //display Sw
  153 + std::cout<<std::endl;
  154 + }
  155 +
  156 + cv::Mat newSw = cv::Mat((int)r, (int)r, CV_32FC1, nSw); //within scatter of gnome g in the population
  157 + cv::Mat newSb = cv::Mat((int)r, (int)r, CV_32FC1, nSb); //within scatter of gnome g in the population
  158 +
  159 + //fisher's ratio from ratio of projected sb and sw
  160 + float fisherRatio = cv::determinant(newSb) /cv::determinant(newSw);
  161 + gnom.S[g] = 1/fisherRatio;
  162 + if (debug) {
  163 + std::cout<<"Score["<<g<<"]: "<< gnom.S[g]<<std::endl;
  164 +
  165 + std::cout << "best gnoem is " << std::endl;
  166 + for (size_t i = 0; i < f; i++)
  167 + std::cout << ga.P[ga.f * g + i] << ", ";
  168 + std::cout << std::endl;
  169 + }
  170 +// if(!isfinite(gnom.S[g])){
  171 +// std::cout<<"-----------------------------------------------"<<std::endl;
  172 +// std::cout<<"Displaying intermediate values of gnome for which score is non finite"<<std::endl;
  173 +// std::cout<<"population "<<std::endl;
  174 +// for(int i = 0; i < ga.f; i++){
  175 +// std::cout<<"\t"<<ga.P[g * ga.f + i];
  176 +// }
  177 +// std::cout<<std::endl;
  178 +// std::cout<<"Sb determinant is "<<cv::determinant(newSb)<<"\t Sw determinant is "<<cv::determinant(newSb)<<std::endl;
  179 +// std::cout<<"fisher ratio is "<<fisherRatio<<std::endl;
  180 +// std::cout<<"Score["<<g<<"]: "<< gnom.S[g]<<std::endl;
  181 +// std::cout<<"------------------------------------------------"<<std::endl;
  182 +//
  183 +// }
  184 +
  185 +
  186 + if(IPIV!= NULL) std::free(IPIV);
  187 + if(gSw_a!= NULL) std::free(gSw_a);
  188 + if(gSbSw_a!= NULL) std::free(gSbSw_a);
  189 + if(Eigenvalues_a!= NULL) std::free(Eigenvalues_a);
  190 + if(EigenvaluesI_a!= NULL) std::free(EigenvaluesI_a);
  191 + if(tempgSb!= NULL) std::free(tempgSb);
  192 + if(tempgSw!= NULL) std::free(tempgSw);
  193 +
  194 +}
  195 +
  196 +
  197 +void fitnessFunction( float* sb, float* sw, float* lda, float* M, float* cM, size_t f, cudaDeviceProp props, size_t gen, std::ofstream& profilefile){
  198 +
  199 + size_t tP = 0; //total number of pixels
  200 + std::for_each(nP.begin(), nP.end(), [&] (size_t n) {
  201 + tP += n;
  202 + });
  203 + size_t nC = nP.size(); //total number of classes
  204 +
  205 + //--------------Compute between class scatter
  206 + // ga.cpu_computeSbSw(sb, sw, M, cM, nC, tP, nP, gen);
  207 + //
  208 + //if(debug){
  209 + // std::cout<<"cpu results of Sb sn Sw"<<std::endl;
  210 + // displayS(sb, ga.f); //display Sb
  211 + // displayS(sw, ga.f); //display Sw
  212 + // }
  213 +
  214 + //ga.callingGpuComputeSbSw(sb, sw, nC, tP, nP, props, gen);
  215 + ga.gpu_computeSbSw(sb, sw, nC, tP, props, gen, debug, profilefile);
  216 + //ga.cpu_computeSbSw(sb, sw, M, cM, nC, tP, nP);
  217 +
  218 + if(debug){
  219 + std::cout<<"From fitness function: gpu results of Sb sn Sw"<<std::endl;
  220 + displayS(sb, ga.f); //display Sb
  221 + displayS(sw, ga.f); //display Sw
  222 + std::cout<<std::endl;
  223 + }
  224 +
  225 + // ----------------------- Linear discriminant Analysis --------------------------------------
  226 + //timer.start();
  227 + //structure is created to pass variable to thread function as it accepts only 3 arguments
  228 + gnom.S = ga.S;
  229 + gnom.Sw = sw;
  230 + gnom.Sb = sb;
  231 + gnom.lda = lda;
  232 +
  233 + //calling function without using threads
  234 + for (size_t i = 0; i<ga.p; i++){
  235 + //calling function for eigencomputation
  236 + gpuComputeEignS(i, f);
  237 + //std::cout<<"Score["<<i<<"]: "<< ga.S[i]<<std::endl;
  238 + }
  239 +
  240 + //std::vector<std::thread> threads;
  241 + //for (size_t g = 0; g<ga.p; g++){
  242 + // //creating thread to do eigen computation
  243 + // threads.push_back(std::thread(gpuComputeEignS, g, f));
  244 + //}
  245 + //
  246 + //// loop again to join the threads
  247 + //for (auto& t : threads)
  248 + // t.join();
  249 +
  250 + const auto elapsed1 = timer.time_elapsed();
  251 + if(gen > ga.gnrtn - 2){
  252 + std::cout << "gpu_eigen time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
  253 + profilefile<< "gpu_eigen time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
  254 + }
  255 + //ga.S = gnom.score;
  256 + //size_t bestGnomeIdx = ga.sortSIndx()[0];
  257 +
  258 +}//end of fitness function
  259 +
  260 +void binaryclassifier(int classnum){
  261 + unsigned int* target = (unsigned int*) calloc(tP, sizeof(unsigned int));
  262 + memcpy(target, ga.T, tP * sizeof(unsigned int));
  263 + for(int i = 0 ; i < tP; i++){
  264 + if(target[i]==classnum){
  265 + ga.T[i] = 1;
  266 +
  267 + }else
  268 + ga.T[i] = 0;
  269 + }
  270 +}
  271 +
  272 +
  273 +
  274 +void advertisement() {
  275 + std::cout << std::endl;
  276 + std::cout << "=========================================================================" << std::endl;
  277 + std::cout << "Thank you for using the GA-GPU features selection for spectroscopic image!" << std::endl;
  278 + std::cout << "=========================================================================" << std::endl << std::endl;
  279 +// std::cout << args.str();
  280 +}
  281 +
  282 +int main(int argc, char* argv[]){
  283 +
  284 +//Add the argument options and set some of the default parameters
  285 + args.add("help", "print this help");
  286 + args.section("Genetic Algorithm");
  287 + args.add("features", "select features selection algorithm parameters","10", "number of features to be selected");
  288 + args.add("classes", "image masks used to specify classes", "", "class1.bmp class2.bmp class3.bmp");
  289 + args.add("population", "total number of feature subsets in puplation matrix", "1000");
  290 + args.add("generations", "number of generationsr", "50");
  291 + args.add("initial_guess", "initial guess of featues", "");
  292 + args.add("debug", "display intermediate data for debugging");
  293 + args.add("binary", "Select features for binary classes", "");
  294 + args.add("trim", "this gives wavenumber to use in trim option of siproc which trims all bands from envi file except gagpu selected bands");
  295 +
  296 + args.parse(argc,argv); //parse the command line arguments
  297 +
  298 +//Print the help text if set
  299 + if(args["help"].is_set()){ //display the help text if requested
  300 + advertisement();
  301 + std::cout<<std::endl<<"usage: ga-gpu input output --option [A B C ...]"<<std::endl;
  302 + std::cout<<std::endl<<std::endl;
  303 + std::cout<<args.str()<<std::endl;
  304 + exit(1);
  305 + }
  306 + if (args.nargs() < 2) { //if the user doesn't provide input and output files
  307 + std::cout << "ERROR: GA-GPU requires an input (ENVI) file and an output (features, text) file." << std::endl;
  308 + return 1;
  309 + }
  310 + if (args["classes"].nargs() < 2) { //if the user doesn't specify at least two class images
  311 + std::cout << "ERROR: GA-GPU requires at least two class images to be specified using the --classes option" << std::endl;
  312 + return 1;
  313 + }
  314 +
  315 + std::string outfile = args.arg(1); //outfile is text file where bnad index, LDA-basis, wavelength and if --trim option is set then trim wavelengths are set respectively
  316 + std::string profile_file = "profile_" + outfile ;
  317 + std::ofstream profilefile(profile_file.c_str(), std::ios::out); //open outfstream for outfile
  318 +
  319 + time_t t_start = time(NULL); //start a timer for file reading
  320 + E.open(args.arg(0), std::string(args.arg(0)) + ".hdr"); //open header file
  321 + size_t X = E.header.samples; //total number of pixels in X dimension
  322 + size_t Y = E.header.lines; //total number of pixels in Y dimension
  323 + B = (unsigned int)E.header.bands; //total number of bands (features)
  324 + std::vector<double> wavelengths = E.header.wavelength; //wavelengths of each band
  325 +
  326 + if(E.header.interleave != stim::envi_header::BIP){ //this code can only load bip files and hence check that in header file
  327 + std::cout<<"this code works for only bip files. please convert file to bip file"<<std::endl;
  328 + exit(1); //if file is not bip file exit code execution
  329 + }
  330 +
  331 +///--------------------------Load features---------------------------------------------
  332 + nP = ga_load_class_images(argc, args, &nC, &tP); //load supervised class images
  333 + ga.F = load_features( nC, tP, B, E, nP); //generate the feature matrix
  334 + ga.T = ga_load_responses(tP, nC, nP); //load the responses for RF training
  335 + E.close(); //close the hyperspectral file
  336 + time_t t_end = time(NULL);
  337 + std::cout<<"Total time: "<<t_end - t_start<<" s"<<std::endl;
  338 + //display_PixelfeatureNclass(ga.F, ga.T , B , 1); //Print one value from feature matrix to debug feature loading
  339 + //std::cout << "pixel target is " << ga.T[0] << " " << ga.T[1] << " " << ga.T[tP - 2] << " " << ga.T[tP - 1]<<std::endl;
  340 +///--------------------------Genetic algorith configurations with defult paramets and from argument values---------------------
  341 + ga.f = args["features"].as_int(0); //number of features to be selected by user default value is 10
  342 + ga.p = args["population"].as_int(0); //population size to be selected by user default value is 1000
  343 + ga.gnrtn = args["generations"].as_int(0); //number of generations to be selected by user default value is 50
  344 + if(args["binary"]) { //set this option when features are to be selected as binary clas features (class vs stroma)
  345 + binClassOne = args["binary"].as_int(0); //sel class number here, if 2 then features are selected for (class-2 vs stroma)
  346 + //feture selection for class selected by user with user arguments (make it binary class data by making chosen class label as 1 and al other class labels 0 from multiclass data )
  347 + //to select feature for all classes in joint class data using binary class system need to write a script with loop covering all classes
  348 + binaryclassifier(binClassOne);
  349 + } ///not fully implemented yet
  350 +
  351 + ga.ub = B; //upper bound is number of bands (i.e. size of z dimension) Note: for this particular application and way code is written lower bound is 0 and upper bound is size of z dimension
  352 + ga.uniformRate = 0.5; //uniform rate is used in crossover
  353 + ga.mutationRate = 0.5f; //in percentage for mutation operation on gnome
  354 + ga.tournamentSize = 5; //for crossover best parents are selected from tournament of gnomes
  355 + ga.elitism = true; // if it is true then best gnome of current generation is passed to next generation
  356 + //initial guess of population
  357 + ga.i_guess = (unsigned int*) calloc(ga.f, sizeof(unsigned int));
  358 + debug = args["debug"];
  359 +
  360 +//==================Generate intial population =================================================
  361 + std::vector<unsigned int> i_guess(ga.f);
  362 + for (size_t b = 0; b < ga.f; b++) //generate default initial guess
  363 + i_guess[b] = rand() % B + 0;
  364 +
  365 + if (args["initial_guess"].is_set()) {//if the user specifies the --initialguess option & provides feature indices as initial guess
  366 + size_t nf = args["initial_guess"].nargs(); //get the number of arguments after initial_guess
  367 + if (nf == 1 || nf == ga.f) { //check if file with initial guessed indices is given or direct indices are given as argument
  368 + if (nf == 1) { //if initial guessed feature indices are given in file
  369 + std::ifstream in; //open the file containing the baseline points
  370 + in.open(args["initial_guess"].as_string().c_str());
  371 + if (in.is_open()){ //if file is present and can be opened then read it
  372 + unsigned int b_ind;
  373 + while (in >> b_ind) //get each band index and push it into the vector
  374 + i_guess.push_back(b_ind);
  375 + }
  376 + else
  377 + std::cout << "cannot open file of initial_guess indices" << std::endl;
  378 + }
  379 + else if (nf == ga.f) { //if direct indices are given as argument
  380 + for (size_t b = 0; b < nf; b++) //for each band given by the user
  381 + i_guess[b] = args["initial_guess"].as_int(b); //store that band in the i_guess array
  382 + }
  383 + }
  384 + }
  385 +
  386 + ga.initialize_population(i_guess, debug); //initialize first population set for first generation, user can pass manually preferred features from command line
  387 + //display_gnome(0);
  388 +
  389 +//------------------Calculate class means and total mean of features----------------------------
  390 + float* M = (float*)calloc( B , sizeof(float)); //total mean of entire feature martix for all features (bands B)
  391 + ga.ttlMean(M, tP, B); //calculate total mean, ga.F is entire feature matrix, M is mean for all bands B(features)
  392 + if(debug) ga.dispalymean(M); //if option --debug is used display all bands mean
  393 +
  394 + //display band index of bands with mean zero, this indicates that band has all zero values
  395 + std::cout<<"Display features indices with zero mean "<<std::endl;
  396 + for(unsigned int i = 0; i < B; i++){
  397 + if(M[i]== 0)
  398 + std::cout<<"\t"<<i;
  399 + }
  400 + std::cout<<std::endl;
  401 +// std::cout << "pixel target is " << ga.T[0] << " " << ga.T[1] << " " << ga.T[tP - 2] << " " << ga.T[tP - 1]<<std::endl;
  402 + float* cM = (float*)calloc(nC * B , sizeof(float)); //cM is nC X B matrix with each row as mean of all samples in one class for all features (bands B)
  403 + ga.classMean(cM, tP, nC, B, nP); //calculate class mean, ga.F is entire feature matrix, M is mean for all bands B(features)
  404 + if(debug) ga.dispalyClassmean(cM, nC);
  405 +
  406 +//------------------------------------GPU init----------------------------------------------------
  407 + //checking for cuda device
  408 + int count;
  409 + HANDLE_ERROR(cudaGetDeviceCount(&count));
  410 + if(count < 1){
  411 + std::cout<<"no cuda device is available"<<std::endl;
  412 + return 1;
  413 + }
  414 + cudaDeviceProp props;
  415 + HANDLE_ERROR(cudaGetDeviceProperties(&props, 0));
  416 + ga.gpuInitializationfrommain(M, cM, nP, tP, nC);
  417 +
  418 +//feture selection for class selected by user with user arguments (make it binary class data by making chosen class label as 1 and al other class labels 0 from multiclass data )
  419 +//to select feature for all classes in joint class data using binary class system need to write a script with loop covering all classes
  420 + //if(binaryClass){
  421 + // binaryclassifier(binClassOne);
  422 + //}
  423 +
  424 +//============================= GA evolution by generations ====================================================
  425 + std::vector<unsigned int> bestgnome; //holds best gnome after each generation evaluation
  426 + size_t bestG_Indx; //This gives index of best gnome in the current population to get best gnome and its fitness value
  427 + unsigned int* newPop = (unsigned int*) calloc(ga.p * ga.f, sizeof(unsigned int)); //temprory storage of new population
  428 + double* best_S = (double*) calloc (ga.gnrtn, sizeof(double)); //stores fitness value of best gnome at each iteration
  429 + float* lda = (float*) calloc (ga.p * (nC-1) * ga.f, sizeof(float)); //stores LDA basis for each gnome so that we can have best gnome's LDA basis
  430 + float* sb = (float*) calloc( ga.p * ga.f * ga.f , sizeof(float)) ; //3d matrix for between class scatter (each 2d matrix between class scatter for one gnome)
  431 + float* sw = (float*) calloc( ga.p * ga.f * ga.f , sizeof(float)) ; //3d matrix for within class scatter (each 2d matrix within class scatter for one gnome)
  432 + ga.zerobandcheck(M, true); //checking bands with all zeros and duplicated bands in a gnome replacing them with other bands avoiding duplication and zero mean
  433 + ga.zerobandcheck(M, true); //Repeating zeroband cheack as some of these bands are not replaced in previous run and gave random results
  434 + time_t gpu_timestart = time(NULL); //start a timer for total evoluation
  435 +
  436 + for (size_t gen = 0; gen < ga.gnrtn; gen++){ //for each generation find fitness value of all gnomes in population matrix and generate population for next generation
  437 + //std::cout<<"Generation: "<<gen<<std::endl;
  438 + fitnessFunction(sb, sw, lda, M , cM, ga.f, props, gen, profilefile); //Evaluate phe(feature matrix for current population) for fitness of all gnomes in current population
  439 + timer.start(); //start timer for new population generation
  440 + bestG_Indx = ga.evolvePopulation(newPop, M, debug); //evolve population to generate new generation population
  441 + const auto pop_generation = timer.time_elapsed(); // end timer for new population generation
  442 + if(gen >ga.gnrtn -2){
  443 + std::cout << "population evolution time "<<std::chrono::duration_cast<std::chrono::microseconds>(pop_generation).count() << "us" << std::endl;
  444 + profilefile<<"population evolution time "<<std::chrono::duration_cast<std::chrono::microseconds>(pop_generation).count() << "us" <<std::endl;
  445 + }
  446 +
  447 + best_S[gen] = ga.S[bestG_Indx]; //score of best gnome in current generation
  448 + bestgnome = ga.getGnome(bestG_Indx); //Best gnome of current populaation
  449 + ga.generateNewP(newPop); //replace current population with new populaiton in the ga classs object
  450 + ga.zerobandcheck(M, false); //checking bands with all zeros and duplicated bands in a gnome replacing them with other bands avoiding duplication and zero mean
  451 + ga.zerobandcheck(M, false); //Repeating zeroband cheack as some of these bands are not replaced in previous run and gave random results
  452 + }//end generation
  453 +
  454 + time_t gpu_timeend = time(NULL); //end a timer for total evoluation
  455 + std::cout<<"Total gpu time: "<<gpu_timeend - gpu_timestart<<" s"<<std::endl;
  456 + profilefile<<"Total gpu time: "<<gpu_timeend - gpu_timestart<<" s"<<std::endl;
  457 +
  458 +//================================ Results of GA ===============================================================
  459 + std::cout<<"best gnome's fitness value is "<<best_S[ga.gnrtn-1]<<std::endl;
  460 + std::cout<<"best gnome is: ";
  461 + for(size_t i = 0; i < ga.f; i++){
  462 + std::cout<<" "<<(bestgnome.at(i));
  463 + }
  464 + std::cout<<std::endl;
  465 +
  466 + //create a text file to store the LDA stats (features subset and LDA-basis)
  467 + ////format of CSV file is: 1st row - band index, 2nd LDA basis depending on number of classes, 3rd - wavenumber corresponding to band index and it --trim is selected then trim wavnumbersare also given
  468 + std::ofstream csv(outfile.c_str(), std::ios::out); //open outfstream for outfile
  469 + size_t ldaindx = bestG_Indx * (nC-1) * ga.f ; //Compute LDA basis index of best gnome
  470 +
  471 + //if(binaryClass){ //this option is for binary class feature selection from joint classes but not fully implemented
  472 + // csv<<binClassOne<<std::endl;
  473 + //}
  474 +
  475 + //fitness values of best gnome is
  476 + csv<<"best gnome's fitness value is "<<best_S[ga.gnrtn-1]<<std::endl; //output fitness value of best gnome in last generation
  477 + //output gnome i.e. band index of selected featurs
  478 + csv<<(bestgnome.at(0)); //output feature subset
  479 + for(size_t i = 1; i < ga.f; i++)
  480 + csv<<","<<(bestgnome.at(i));
  481 + csv<<std::endl;
  482 +
  483 + //output LDA basis of size r X f, r is nC - 1 as LDA projection is rank limited by number of classes - 1
  484 + for (size_t i = 0; i < nC-1; i++){
  485 + csv<<lda[ldaindx + i * ga.f ];
  486 + for (size_t j = 1; j < ga.f; j++){
  487 + csv<<","<<lda[ldaindx + i * ga.f +j];
  488 + }
  489 + csv << std::endl;
  490 + }
  491 + //output actual wavelenths corresponding to those band indices
  492 + csv << (wavelengths[bestgnome.at(0)]);
  493 + for (size_t i = 1; i < ga.f; i++)
  494 + csv << "," << (wavelengths[bestgnome.at(i)]);
  495 + csv << std::endl;
  496 +
  497 +
  498 + if (args["trim"].is_set()) {
  499 + csv << "trim info" << std::endl;
  500 + std::sort(bestgnome.begin(), bestgnome.end()); //sort features index in best gnome
  501 +
  502 + std::vector<unsigned int> trimindex(ga.f); //create a vector to store temprory trim index bounds
  503 + std::vector<unsigned int> finaltrim_ind; //create a vector to store final trim index bounds
  504 + std::vector<unsigned int> trim_wv; //create a vector to store final trim wavelength bounds
  505 +
  506 + //trim index
  507 + trimindex.push_back(1); //1st trimming band index is 1
  508 + for (size_t i = 0; i < ga.f; i++) { // for each feature find its bound indexes
  509 + trimindex[i * 2] = bestgnome.at(i) - 1;
  510 + trimindex[i * 2 + 1] = bestgnome.at(i) + 1;
  511 + }
  512 + trimindex.push_back(B); //last bound index is B
  513 +
  514 + //organize trim index
  515 + int k = 0;
  516 + for (size_t i = 0; i < ga.f + 1; i++) { // find valid pair of trim indices bound excluding adjacent trim indices
  517 + if (trimindex[2 * i] < trimindex[2 * i + 1]) {
  518 + finaltrim_ind.push_back(trimindex[2 * i]); //this is left bound
  519 + finaltrim_ind.push_back(trimindex[2 * i + 1]);
  520 + k = k + 2;
  521 + }
  522 + }
  523 + //add duplicated trim indices as single index to final trim index
  524 + for (size_t i = 0; i < ga.f + 1; i++) { //check each pair of trim indices for duplications
  525 + if (trimindex[2 * i] == trimindex[2 * i + 1]) { // (duplication caused due to adjacent features)
  526 + finaltrim_ind.push_back(trimindex[2 * i]); // remove duplicated trim indices replace by one
  527 + k = k + 1;
  528 + }
  529 + }
  530 +
  531 +
  532 + ////output actual wavelenths corresponding to those trim indices
  533 + ////these wavenumber are grouped in pairs, check each pair, if duplicated numbers are there in pair delete one and keep other as band to trim, if 2nd wavnumber is smaller than 1st in a pair ignore that pair
  534 + ////e.g [1, 228, 230, 230, 232, 350,352, 351, 353, 1200] pairas [1(start)-228,230-230, 232-350, 352-351, 353-1200(end)], trimming wavenumbers are [1-228, 230, 233-350, 353-1200]
  535 + csv << (wavelengths[finaltrim_ind.at(0)]);
  536 + for (size_t i = 1; i < ga.f * + 2 ; i++)
  537 + csv << "," << (wavelengths[finaltrim_ind.at(i)]);
  538 + csv << std::endl;
  539 + } //end trim option
  540 +
  541 +
  542 + //free gpu pointers
  543 + ga.gpu_Destroy();
  544 +
  545 + //free pointers
  546 + std::free(sb);
  547 + std::free(sw);
  548 + std::free(M);
  549 + std::free(cM);
  550 + std::free(best_S);
  551 + std::free(lda);
  552 + std::free(newPop);
  553 +
  554 +}//end main
  555 +
  556 +
src/timer.h 0 → 100644
  1 +++ a/src/timer.h
  1 +#ifndef GA_GPU_TIMER_H
  2 +#define GA_GPU_TIMER_H
  3 +
  4 +#include <chrono>
  5 +//#include <thread>
  6 +//#include <iostream>
  7 +
  8 +//using namespace std::chrono;
  9 +
  10 +class Timer {
  11 +typedef std::chrono::high_resolution_clock Clock;
  12 +
  13 +Clock::time_point epoch;
  14 +public:
  15 + void start(){
  16 + epoch = Clock::now();
  17 + }
  18 + Clock::duration time_elapsed() const{
  19 + return Clock::now() - epoch;
  20 + }
  21 +};
  22 +
  23 +#endif
  24 +
  25 +
  26 +//class Timer {
  27 +// std::chrono::time_point<std::chrono::high_resolution_clock> epoch;
  28 +//
  29 +// public:
  30 +// typedef high_resolution_clock Clock;
  31 +// void start(){
  32 +// epoch = Clock::now();
  33 +// }
  34 +// std::chrono::time_point<std::chrono::high_resolution_clock> time_elapsed() const {
  35 +// return Clock::now() - epoch;
  36 +// }
  37 +//};
0 \ No newline at end of file 38 \ No newline at end of file