Commit 0d84034253937b68843ec545a0050dd6abaccf5c

Authored by David Mayerich
0 parents

public release commit

CMakeLists.txt 0 → 100644
  1 +++ a/CMakeLists.txt
  1 +#Specify the version being used aswell as the language
  2 +cmake_minimum_required(VERSION 2.8)
  3 +
  4 +#Name your project here
  5 +project(ga-gpu)
  6 +
  7 +#set the module directory
  8 +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}")
  9 +
  10 +#default to release mode
  11 +if(NOT CMAKE_BUILD_TYPE)
  12 + set(CMAKE_BUILD_TYPE Release)
  13 +endif(NOT CMAKE_BUILD_TYPE)
  14 +
  15 +#build the executable in the binary directory on MS Visual Studio
  16 +if ( MSVC )
  17 + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}")
  18 + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}")
  19 +endif ( MSVC )
  20 +#MAYBE REMOVE-----------------
  21 +#set C++11 flags if using GCC
  22 +if( CMAKE_COMPILER_IS_GNUCC )
  23 +# SET( CMAKE_CXX_FLAGS "-std=c++11")
  24 + set(CMAKE_CXX_FLAGS "-std=c++11 -D_FORCE_INLINES")
  25 +# SET( CUDA_NVCC_FLAGS "-std=c++11")
  26 +endif( CMAKE_COMPILER_IS_GNUCC )
  27 +
  28 +SET( CUDA_NVCC_FLAGS "--gpu-architecture=compute_50 --gpu-code=sm_50,compute_50")
  29 +#-----------------------------
  30 +
  31 +
  32 +
  33 +#find packages-----------------------------------
  34 +#find OpenCV
  35 +find_package(OpenCV REQUIRED)
  36 +add_definitions(-DUSING_OPENCV)
  37 +
  38 +#find the pthreads package
  39 +find_package(Threads)
  40 +
  41 +#find the X11 package
  42 +find_package(X11)
  43 +
  44 +#find the STIM library
  45 +find_package(STIM)
  46 +
  47 +#find CUDA, mostly for LA stuff using cuBLAS
  48 +find_package(CUDA REQUIRED)
  49 +
  50 +#find Boost for Unix-based file lists
  51 +if( CMAKE_COMPILER_IS_GNUCC )
  52 + find_package(Boost COMPONENTS filesystem system)
  53 + if(Boost_FOUND)
  54 + include_directories(${Boost_INCLUDE_DIR})
  55 + else()
  56 + message(FATAL_ERROR "HSIproc requires Boost::filesystem and Boost::system when using GCC")
  57 + endif()
  58 +endif()
  59 +
  60 +#find FANN
  61 +#find_package(FANN REQUIRED)
  62 +
  63 +#find the GLUT library for visualization
  64 +#find_package(OpenGL REQUIRED)
  65 +#find_package(GLUT REQUIRED)
  66 +#if(WIN32)
  67 +# find_package(GLEW REQUIRED)
  68 +# include_directories(${GLEW_INCLUDE_DIR})
  69 +#endif(WIN32)
  70 +
  71 +#find LAPACK and supporting link_libraries
  72 +find_package(LAPACKE REQUIRED)
  73 +
  74 +#include include directories
  75 +include_directories(${CUDA_INCLUDE_DIRS}
  76 + ${OpenCV_INCLUDE_DIRS}
  77 + ${LAPACKE_INCLUDE_DIR}
  78 + ${STIM_INCLUDE_DIRS}
  79 + ${OpenGL_INCLUDE_DIRS}
  80 +# ${GLUT_INCLUDE_DIR}
  81 + ${FANN_INCLUDE_DIRS}
  82 + "${CMAKE_SOURCE_DIR}/src"
  83 +)
  84 +
  85 +#Assign a variable for all of the header files in this project
  86 +include_directories("${CMAKE_SOURCE_DIR}/src")
  87 +#file(GLOB GACPU_H "${CMAKE_SOURCE_DIR}/src/gacpu/*.h")
  88 +file(GLOB GAGPU_H "${CMAKE_SOURCE_DIR}/src/*.h")
  89 +#file(GLOB GA_H "${CMAKE_SOURCE_DIR}/src/*.h")
  90 +
  91 +#Assign source files to the appropriate variables to easily associate them with executables
  92 +#file(GLOB GA_CPU_SRC "${CMAKE_SOURCE_DIR}/src/gacpu/*.cpp")
  93 +file(GLOB GA_GPU_SRC "${CMAKE_SOURCE_DIR}/src/*.c*")
  94 +
  95 +
  96 +#create an executable file
  97 +cuda_add_executable(ga-gpu
  98 + ${GAGPU_H}
  99 +# ${GA_H}
  100 + ${GA_GPU_SRC}
  101 +)
  102 +target_link_libraries(ga-gpu ${CUDA_LIBRARIES}
  103 + ${CUDA_CUBLAS_LIBRARIES}
  104 + ${CUDA_CUFFT_LIBRARIES}
  105 + ${LAPACKE_LIBRARIES}
  106 + ${LAPACK_LIBRARIES}
  107 + ${BLAS_LIBRARIES}
  108 + ${CMAKE_THREAD_LIBS_INIT}
  109 + ${X11_LIBRARIES}
  110 + ${OpenCV_LIBS}
  111 +)
  112 +
  113 +
  114 +#create the PROC executable----------------------------------------------
  115 +
  116 +#create an executable file
  117 +#add_executable(hsiga
  118 +# ${GACPU_H}
  119 +# ${GA_H}
  120 +# ${GA_CPU_SRC}
  121 +#)
  122 +#target_link_libraries(hsiga ${LAPACKE_LIBRARIES}
  123 +# ${LAPACK_LIBRARIES}
  124 +# ${BLAS_LIBRARIES}
  125 +# ${CMAKE_THREAD_LIBS_INIT}
  126 +# ${X11_LIBRARIES}
  127 +# ${OpenCV_LIBS}
  128 +#)
  129 +
  130 +
  131 +
  132 +#if Boost is found, set an environment variable to use with preprocessor directives
  133 +if(Boost_FILESYSTEM_FOUND)
  134 +# if(BUILD_GACPU)
  135 +# target_link_libraries(hsiga ${Boost_FILESYSTEM_LIBRARIES}
  136 +# ${Boost_SYSTEM_LIBRARY}
  137 +# )
  138 + #message(${Boost_FILESYSTEM_LIBRARIES})
  139 +# endif(BUILD_GACPU)
  140 +# if(BUILD_GAGPU)
  141 + target_link_libraries(ga-gpu ${Boost_FILESYSTEM_LIBRARIES}
  142 + ${Boost_SYSTEM_LIBRARY}
  143 + )
  144 +# endif(BUILD_GAGPU)
  145 +endif(Boost_FILESYSTEM_FOUND)
... ...
FindGLEW.cmake 0 → 100644
  1 +++ a/FindGLEW.cmake
  1 +# Copyright (c) 2012-2016 DreamWorks Animation LLC
  2 +#
  3 +# All rights reserved. This software is distributed under the
  4 +# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ )
  5 +#
  6 +# Redistributions of source code must retain the above copyright
  7 +# and license notice and the following restrictions and disclaimer.
  8 +#
  9 +# * Neither the name of DreamWorks Animation nor the names of
  10 +# its contributors may be used to endorse or promote products derived
  11 +# from this software without specific prior written permission.
  12 +#
  13 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  14 +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  15 +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  16 +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  17 +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL,
  18 +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  19 +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20 +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21 +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22 +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  23 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24 +# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE
  25 +# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00.
  26 +#
  27 +
  28 +#-*-cmake-*-
  29 +# - Find GLEW
  30 +#
  31 +# Author : Nicholas Yue yue.nicholas@gmail.com
  32 +#
  33 +# This auxiliary CMake file helps in find the GLEW headers and libraries
  34 +#
  35 +# GLEW_FOUND set if Glew is found.
  36 +# GLEW_INCLUDE_DIR GLEW's include directory
  37 +# GLEW_glew_LIBRARY GLEW libraries
  38 +# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context)
  39 +
  40 +FIND_PACKAGE ( PackageHandleStandardArgs )
  41 +
  42 +FIND_PATH( GLEW_LOCATION include/GL/glew.h
  43 + "$ENV{GLEW_ROOT}"
  44 + NO_DEFAULT_PATH
  45 + NO_SYSTEM_ENVIRONMENT_PATH
  46 + )
  47 +
  48 +FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW
  49 + REQUIRED_VARS GLEW_LOCATION
  50 + )
  51 +
  52 +IF ( GLEW_LOCATION )
  53 +
  54 + SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path")
  55 +
  56 + SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
  57 + IF (GLEW_USE_STATIC_LIBS)
  58 + IF (APPLE)
  59 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
  60 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  61 + NO_DEFAULT_PATH
  62 + NO_SYSTEM_ENVIRONMENT_PATH
  63 + )
  64 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  65 + NO_DEFAULT_PATH
  66 + NO_SYSTEM_ENVIRONMENT_PATH
  67 + )
  68 + # MESSAGE ( "APPLE STATIC" )
  69 + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
  70 + ELSEIF (WIN32)
  71 + # Link library
  72 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
  73 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib )
  74 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib )
  75 + ELSE (APPLE)
  76 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
  77 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  78 + NO_DEFAULT_PATH
  79 + NO_SYSTEM_ENVIRONMENT_PATH
  80 + )
  81 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  82 + NO_DEFAULT_PATH
  83 + NO_SYSTEM_ENVIRONMENT_PATH
  84 + )
  85 + # MESSAGE ( "LINUX STATIC" )
  86 + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} )
  87 + ENDIF (APPLE)
  88 + ELSE ()
  89 + IF (APPLE)
  90 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib")
  91 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib )
  92 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib )
  93 + ELSEIF (WIN32)
  94 + # Link library
  95 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib")
  96 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib )
  97 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib )
  98 + # Load library
  99 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll")
  100 + FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin
  101 + NO_DEFAULT_PATH
  102 + NO_SYSTEM_ENVIRONMENT_PATH
  103 + )
  104 + FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin
  105 + NO_DEFAULT_PATH
  106 + NO_SYSTEM_ENVIRONMENT_PATH
  107 + )
  108 + ELSE (APPLE)
  109 + # Unices
  110 + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib
  111 + NO_DEFAULT_PATH
  112 + NO_SYSTEM_ENVIRONMENT_PATH
  113 + )
  114 + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib
  115 + NO_DEFAULT_PATH
  116 + NO_SYSTEM_ENVIRONMENT_PATH
  117 + )
  118 + ENDIF (APPLE)
  119 + ENDIF ()
  120 + # MUST reset
  121 + SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES})
  122 +
  123 + SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library")
  124 + SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library")
  125 +
  126 +ENDIF ()
... ...
FindGLUT.cmake 0 → 100644
  1 +++ a/FindGLUT.cmake
  1 +#.rst:
  2 +# FindGLUT
  3 +# --------
  4 +#
  5 +# try to find glut library and include files.
  6 +#
  7 +# IMPORTED Targets
  8 +# ^^^^^^^^^^^^^^^^
  9 +#
  10 +# This module defines the :prop_tgt:`IMPORTED` targets:
  11 +#
  12 +# ``GLUT::GLUT``
  13 +# Defined if the system has GLUT.
  14 +#
  15 +# Result Variables
  16 +# ^^^^^^^^^^^^^^^^
  17 +#
  18 +# This module sets the following variables:
  19 +#
  20 +# ::
  21 +#
  22 +# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc.
  23 +# GLUT_LIBRARIES, the libraries to link against
  24 +# GLUT_FOUND, If false, do not try to use GLUT.
  25 +#
  26 +# Also defined, but not for general use are:
  27 +#
  28 +# ::
  29 +#
  30 +# GLUT_glut_LIBRARY = the full path to the glut library.
  31 +# GLUT_Xmu_LIBRARY = the full path to the Xmu library.
  32 +# GLUT_Xi_LIBRARY = the full path to the Xi Library.
  33 +
  34 +#=============================================================================
  35 +# Copyright 2001-2009 Kitware, Inc.
  36 +#
  37 +# Distributed under the OSI-approved BSD License (the "License");
  38 +# see accompanying file Copyright.txt for details.
  39 +#
  40 +# This software is distributed WITHOUT ANY WARRANTY; without even the
  41 +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  42 +# See the License for more information.
  43 +#=============================================================================
  44 +# (To distribute this file outside of CMake, substitute the full
  45 +# License text for the above reference.)
  46 +
  47 +if (WIN32)
  48 + find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h
  49 + PATHS $ENV{GLUT_ROOT_PATH}/include )
  50 +
  51 + if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  52 + find_library( GLUT_glut_LIBRARY NAMES freeglut
  53 + PATHS
  54 + $ENV{GLUT_ROOT_PATH}/lib/x64
  55 +
  56 + NO_DEFAULT_PATH
  57 + )
  58 + else( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  59 + find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut
  60 + PATHS
  61 + ${OPENGL_LIBRARY_DIR}
  62 + $ENV{GLUT_ROOT_PATH}/lib
  63 + )
  64 + endif( CMAKE_SIZEOF_VOID_P EQUAL 8 )
  65 +
  66 +else ()
  67 +
  68 + if (APPLE)
  69 + find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR})
  70 + find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX")
  71 + find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX")
  72 +
  73 + if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa)
  74 + add_library(GLUT::Cocoa UNKNOWN IMPORTED)
  75 + # Cocoa should always be a Framework, but we check to make sure.
  76 + if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$")
  77 + set_target_properties(GLUT::Cocoa PROPERTIES
  78 + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}")
  79 + else()
  80 + set_target_properties(GLUT::Cocoa PROPERTIES
  81 + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}")
  82 + endif()
  83 + endif()
  84 + else ()
  85 +
  86 + if (BEOS)
  87 +
  88 + set(_GLUT_INC_DIR /boot/develop/headers/os/opengl)
  89 + set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86)
  90 +
  91 + else()
  92 +
  93 + find_library( GLUT_Xi_LIBRARY Xi
  94 + /usr/openwin/lib
  95 + )
  96 +
  97 + find_library( GLUT_Xmu_LIBRARY Xmu
  98 + /usr/openwin/lib
  99 + )
  100 +
  101 + if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi)
  102 + add_library(GLUT::Xi UNKNOWN IMPORTED)
  103 + set_target_properties(GLUT::Xi PROPERTIES
  104 + IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}")
  105 + endif()
  106 +
  107 + if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu)
  108 + add_library(GLUT::Xmu UNKNOWN IMPORTED)
  109 + set_target_properties(GLUT::Xmu PROPERTIES
  110 + IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}")
  111 + endif()
  112 +
  113 + endif ()
  114 +
  115 + find_path( GLUT_INCLUDE_DIR GL/glut.h
  116 + /usr/include/GL
  117 + /usr/openwin/share/include
  118 + /usr/openwin/include
  119 + /opt/graphics/OpenGL/include
  120 + /opt/graphics/OpenGL/contrib/libglut
  121 + ${_GLUT_INC_DIR}
  122 + )
  123 +
  124 + find_library( GLUT_glut_LIBRARY glut
  125 + /usr/openwin/lib
  126 + ${_GLUT_glut_LIB_DIR}
  127 + )
  128 +
  129 + unset(_GLUT_INC_DIR)
  130 + unset(_GLUT_glut_LIB_DIR)
  131 +
  132 + endif ()
  133 +
  134 +endif ()
  135 +
  136 +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR)
  137 +
  138 +if (GLUT_FOUND)
  139 + # Is -lXi and -lXmu required on all platforms that have it?
  140 + # If not, we need some way to figure out what platform we are on.
  141 + set( GLUT_LIBRARIES
  142 + ${GLUT_glut_LIBRARY}
  143 + ${GLUT_Xmu_LIBRARY}
  144 + ${GLUT_Xi_LIBRARY}
  145 + ${GLUT_cocoa_LIBRARY}
  146 + )
  147 +
  148 + if(NOT TARGET GLUT::GLUT)
  149 + add_library(GLUT::GLUT UNKNOWN IMPORTED)
  150 + set_target_properties(GLUT::GLUT PROPERTIES
  151 + INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}")
  152 + if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$")
  153 + set_target_properties(GLUT::GLUT PROPERTIES
  154 + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}")
  155 + else()
  156 + set_target_properties(GLUT::GLUT PROPERTIES
  157 + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}")
  158 + endif()
  159 +
  160 + if(TARGET GLUT::Xmu)
  161 + set_property(TARGET GLUT::GLUT APPEND
  162 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu)
  163 + endif()
  164 +
  165 + if(TARGET GLUT::Xi)
  166 + set_property(TARGET GLUT::GLUT APPEND
  167 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi)
  168 + endif()
  169 +
  170 + if(TARGET GLUT::Cocoa)
  171 + set_property(TARGET GLUT::GLUT APPEND
  172 + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa)
  173 + endif()
  174 + endif()
  175 +
  176 + #The following deprecated settings are for backwards compatibility with CMake1.4
  177 + set (GLUT_LIBRARY ${GLUT_LIBRARIES})
  178 + set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR})
  179 +endif()
  180 +
  181 +mark_as_advanced(
  182 + GLUT_INCLUDE_DIR
  183 + GLUT_glut_LIBRARY
  184 + GLUT_Xmu_LIBRARY
  185 + GLUT_Xi_LIBRARY
  186 + )
... ...
FindLAPACKE.cmake 0 → 100644
  1 +++ a/FindLAPACKE.cmake
  1 +# - Try to find LAPACKE
  2 +#
  3 +# Once done this will define
  4 +# LAPACKE_FOUND - System has LAPACKE
  5 +# LAPACKE_INCLUDE_DIRS - The LAPACKE include directories
  6 +# LAPACKE_LIBRARIES - The libraries needed to use LAPACKE
  7 +# LAPACKE_DEFINITIONS - Compiler switches required for using LAPACKE
  8 +#
  9 +# Usually, LAPACKE requires LAPACK and the BLAS. This module does
  10 +# not enforce anything about that.
  11 +
  12 +find_path(LAPACKE_INCLUDE_DIR
  13 + NAMES lapacke.h
  14 + PATHS $ENV{LAPACK_PATH} ${INCLUDE_INSTALL_DIR}
  15 + PATHS ENV INCLUDE)
  16 +
  17 +find_library(LAPACKE_LIBRARY liblapacke lapacke
  18 + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
  19 + PATHS ENV LIBRARY_PATH
  20 + PATHS ENV LD_LIBRARY_PATH)
  21 +
  22 +if(MSVC)
  23 + find_library(LAPACK_LIBRARY liblapack lapack
  24 + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
  25 + PATHS ENV LIBRARY_PATH
  26 + PATHS ENV LD_LIBRARY_PATH)
  27 +
  28 + find_library(BLAS_LIBRARY libblas blas
  29 + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR}
  30 + PATHS ENV LIBRARY_PATH
  31 + PATHS ENV LD_LIBRARY_PATH)
  32 +
  33 +else()
  34 + find_library(LAPACK REQUIRED)
  35 + find_library(BLAS REQUIRED)
  36 +endif()
  37 +set(LAPACKE_LIBRARIES ${LAPACKE_LIBRARY} ${LAPACK_LIBRARY} ${BLAS_LIBRARY})
  38 +
  39 +include(FindPackageHandleStandardArgs)
  40 +find_package_handle_standard_args(LAPACKE DEFAULT_MSG
  41 + LAPACKE_INCLUDE_DIR
  42 + LAPACKE_LIBRARIES)
  43 +mark_as_advanced(LAPACKE_INCLUDE_DIR LAPACKE_LIBRARIES)
... ...
FindSTIM.cmake 0 → 100644
  1 +++ a/FindSTIM.cmake
  1 +# finds the STIM library (downloads it if it isn't present)
  2 +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository)
  3 +
  4 +include(FindPackageHandleStandardArgs)
  5 +
  6 +set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH})
  7 +
  8 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
  9 +
  10 +if(STIM_FOUND)
  11 + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR})
  12 +elseif(STIM_FOUND)
  13 + #if the STIM library isn't found, download it
  14 + #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists
  15 + #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git")
  16 + #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
  17 + #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH)
  18 + message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.")
  19 + message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib")
  20 +endif(STIM_FOUND)
  21 +
  22 +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR)
... ...
src/basic_functions.h 0 → 100644
  1 +++ a/src/basic_functions.h
  1 +#include <stdio.h>
  2 +
  3 +
  4 +size_t* sortIndx(float* input, size_t size){
  5 + //sort indices of score in ascending order (fitness value)
  6 + size_t *idx;
  7 + idx = (size_t*) malloc (size * sizeof (size_t));
  8 + for (size_t i = 0; i < size; i++)
  9 + idx[i] = i;
  10 +
  11 + for (size_t i=0; i<size; i++){
  12 + for (size_t j=i+1; j<size; j++){
  13 + if (input[idx[i]] < input[idx[j]]){
  14 + std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  15 + }
  16 + }
  17 + }
  18 + return idx; //use as sortSIdx in selection
  19 +}
  20 +
  21 +
  22 +template<typename T>
  23 +void mtxMul(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){
  24 + //compute output matrix M3 of size row1 X column2 and data is column major
  25 + for(size_t i = 0 ; i <r1; i++){
  26 + for(size_t j = 0; j< c2; j++){
  27 + T temp = 0;
  28 + for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication
  29 + temp+= M1[i * c1 + k] * M2[k * c2 + j]; //compute an element of output matrix
  30 + }
  31 + M3[i * c1 + j] = temp; //copy an element to output matrix
  32 + }
  33 + }
  34 +}
  35 +
  36 +template<typename T>
  37 +void mtxMultranspose(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){
  38 + //compute output matrix M3 of size row1 X column2 and data is column major
  39 + for(size_t i = 0 ; i <r1; i++){
  40 + for(size_t j = 0; j< r2; j++){
  41 + T temp = 0;
  42 + for(size_t k = 0; k < c1 ; k++){ //column1 = row2 for matrix multiplication
  43 + temp+= M1[i * c1 + k] * M2[j * c2 + k]; //compute an element of output matrix
  44 + }
  45 + M3[i * r1 + j] = temp; //copy an element to output matrix
  46 + }
  47 + }
  48 +}
  49 +
  50 + //display within class scatter
  51 +template<typename T>
  52 +void displayS(T* sw, size_t f){
  53 +
  54 + for(size_t g = 0; g<1; g++){
  55 + std::cout<<std::endl;
  56 + for(size_t j = 0; j < f; j++){ //total number of features in a gnome
  57 + for(size_t k = 0; k < f; k++){ //total number of features in a gnome
  58 + std::cout<<sw[g*f*f + j*f + k]<<" ";
  59 + }
  60 + std::cout<<std::endl;
  61 + }
  62 + }
  63 + std::cout<<std::endl;
  64 +}
  65 +
  66 +//sort eigenvalues from lapacke results
  67 +size_t* sortEigenVectorIndx(float* eigenvalue, size_t N){
  68 + //sort indices of score in ascending order (fitness value)
  69 + size_t *idx = (size_t*) malloc (N * sizeof (size_t));
  70 + for (size_t i = 0; i < N; i++)
  71 + idx[i] = i;
  72 +
  73 + for (size_t i=0; i<N; i++){
  74 + for (size_t j=i+1; j<N; j++){
  75 + if (eigenvalue[idx[i]] > eigenvalue[idx[j]]){
  76 + std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  77 + }
  78 + }
  79 + }
  80 +
  81 + std::cout<<"best eigenvalue index: "<<eigenvalue[idx[0]]<<std::endl;
  82 +
  83 + return idx; //use as sortSIdx in selection
  84 +
  85 +}
... ...
src/enviload.h 0 → 100644
  1 +++ a/src/enviload.h
  1 +#include <iostream>
  2 +#include <fstream>
  3 +#include <thread>
  4 +#include <random>
  5 +#include <vector>
  6 +//#include <algorithm>
  7 +
  8 +#define NOMINMAX
  9 +
  10 +//stim libraries
  11 +#include <stim/envi/envi.h>
  12 +#include <stim/image/image.h>
  13 +#include <stim/parser/arguments.h>
  14 +#include <stim/ui/progressbar.h>
  15 +#include <stim/parser/filename.h>
  16 +//#include <stim/visualization/colormap.h>
  17 +#include <stim/parser/table.h>
  18 +
  19 +std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel#
  20 +//loads spectral features into a feature matrix based on a set of class images (or masks)
  21 +float* load_features(size_t nC, size_t tP, size_t B, stim::envi E, std::vector< unsigned int > nP){
  22 + float progress = 0; //initialize the progress bar variable
  23 + unsigned long long bytes_fmat = sizeof(float) * tP * B; //calculate the number of bytes in the feature matrix
  24 + std::cout<<"totalnumber of samples "<<tP<<std::endl;
  25 + std::cout<<"Allocating space for the feature matrix: "<<tP<<" x "<<B<<" = "<<(float)bytes_fmat/(float)1048576<<"MB"<<std::endl;
  26 + float* F = (float*) malloc(bytes_fmat); //allocate space for the sifted matrix
  27 + std::cout<<"Loading Training Data ("<<nC<<" classes)"<<std::endl;
  28 + //load all of the training spectra into an array
  29 + unsigned long long F_idx = 0; //initialize the matrix index to 0
  30 + //unsigned long long R_idx = 0;
  31 + for(unsigned c = 0; c < nC; c++){ //for each class image
  32 + std::cout<<"\tSifting class "<<c+1<<" = "<<nP[c]<<" pixels..."<<std::endl;
  33 + // std::thread t1 = std::thread(progress_thread_envi, &E); //start the progress bar thread
  34 + E.sift((void*)&F[F_idx], C[c].data(), true); //sift that class into the matrix at the proper location
  35 + F_idx += nP[c] * B;
  36 + progress = (float)(c+1) / (float)nC * 100;
  37 + // t1.join();
  38 + }
  39 +
  40 + return F;
  41 +}
  42 +
  43 +/// Load responses for a Random Forest Classifier
  44 +unsigned int* ga_load_responses(size_t tP, size_t nC, std::vector< unsigned int > nP){
  45 + unsigned int* T = (unsigned int*)malloc(tP*sizeof(unsigned int)); //generate an OpenCV vector of responses
  46 + size_t R_idx = 0; //index into the response array
  47 + for(size_t c = 0; c < nC; c++){ //for each class image
  48 + for(unsigned long long l = 0; l < nP[c]; l++){ //assign a response for all pixels of class c loaded in the training matrix
  49 + T[R_idx + l] = (unsigned int)c+1;
  50 + }
  51 + R_idx += nP[c]; //increment the response vector index
  52 + }
  53 + return T;
  54 +}
  55 +
  56 +
  57 +//loads the necessary data for training a random forest classifier
  58 +std::vector< unsigned int > ga_load_class_images(int argc, stim::arglist args, size_t* nC, size_t* tP){
  59 + if(args["classes"].nargs() < 2){ //if fewer than two classes are specified, there's a problem
  60 + std::cout<<"ERROR: training requires at least two class masks"<<std::endl;
  61 + exit(1);
  62 + }
  63 + std::vector< unsigned int > nP;
  64 + size_t num_images = args["classes"].nargs(); //count the number of class images
  65 + //size_t num_images = args["rf"].nargs(); //count the number of class images
  66 + //std::vector<std::string> filenames(num_images); //initialize an array of file names to store the names of the images
  67 + std::string filename; //allocate space to store the filename for an image
  68 + for(size_t c = 0; c < num_images; c++){ //for each image
  69 + filename = args["classes"].as_string(c);; //get the class image file name
  70 + stim::image<unsigned char> image(filename); //load the image
  71 + //push_training_image(image.channel(0), nC, tP, nP); //push channel zero (all class images are assumed to be single channel)
  72 + C.push_back(image.channel(0));
  73 + unsigned int npixels = (unsigned int)image.channel(0).nnz();
  74 + nP.push_back(npixels); //push the number of pixels onto the pixel array
  75 + *tP += npixels; //add to the running total of pixels
  76 + *nC = *nC + 1;
  77 + }
  78 +
  79 + return nP;
  80 +}
  81 +
  82 +void display_PixelfeatureNclass(float* F, unsigned int* T, size_t B, size_t Idx){
  83 + //display code for debug, displaying Idx th pixel from feature matrix F with all features B
  84 + std::cout<<"class of pixel["<<Idx<<"]" <<"is: "<<T[Idx]<<std::endl;
  85 + std::cout<<"feature["<<Idx<<"] is: "<<std::endl;
  86 + for (size_t i = 0; i< B; i++)
  87 + std::cout<<" "<<F[Idx * B + i];
  88 +}
  89 +
  90 +
  91 +void display_args(int argc, stim::arglist args){
  92 + std::cout<<"number of arguments "<<argc<<std::endl;
  93 + std::cout<<"arg 0 "<<args.arg(0)<<std::endl;
  94 + std::cout<<"arg 1 "<<args.arg(1)<<std::endl;
  95 +}
  96 +
  97 +void display_dataSize(size_t X, size_t Y, size_t B){
  98 + std::cout<<"number of samples "<<X*Y<<std::endl;
  99 + std::cout<<"number of bands "<<B<<std::endl;
  100 +
  101 +}
  102 +
  103 +void display_phe(float* phe, unsigned int* P, size_t p,size_t f, size_t i, size_t j){
  104 + //display code for debug, displaying jth pixel from new feature matrix which is created for gnome i
  105 + std::cout<<"phe["<<i<<"]["<<j<<"]"<<std::endl;
  106 + for(unsigned int n = 0; n < f; n++){
  107 + std::cout<<P[i * f + n]; //spectral feature indices from gnome i of current population
  108 + std::cout<<" "<<phe[i* (p * f) +j * f + n]<<std::endl; //display 100th pixel value corresponding to feature indices in the gnome
  109 +
  110 + }
  111 +}
  112 +
  113 +
  114 +void display_gnome(unsigned int* P,size_t f,size_t gIdx){
  115 + //display code for debug, displaying gnome gIdx of current population, gnome is subset of feature indices
  116 + for (size_t i = 0; i< f; i++)
  117 + std::cout<<" "<<P[gIdx * f + i];
  118 +}
  119 +
... ...
src/ga_gpu.cu 0 → 100644
  1 +++ a/src/ga_gpu.cu
  1 +#ifndef GA_GPU_CU
  2 +#define GA_GPU_CU
  3 +
  4 +//#include <cuda.h>
  5 +//#include "cuda_runtime.h"
  6 +//#include <cuda_runtime_api.h>
  7 +//#include "device_launch_parameters.h"
  8 +#include <stim/cuda/cudatools/error.h>
  9 +
  10 +#include "timer.h"
  11 +//#include <stdio.h>
  12 +//#include <stdlib.h>
  13 +#include <iostream>
  14 +#include <fstream>
  15 +
  16 +extern Timer timer;
  17 +
  18 +
  19 +__global__ void kernel_computeSb(float* gpuSb, unsigned int* gpuP, float* gpuM, float* gpuCM, size_t ub, size_t f, size_t p, size_t nC, unsigned int* gpu_nPxInCls){
  20 +
  21 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix
  22 + size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome
  23 + size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //if we use 3d grid then it is needed
  24 +
  25 +
  26 + if(gnomeIndx >= p || i >= f || j >= f) return; //handling segmentation fault
  27 +
  28 + //form a sb matrix from vector sbVec, multiply each element in matrix with num of pixels in the current class
  29 + //and add it to previous value of between class scatter matrix sb
  30 + float tempsbval;
  31 + size_t n1;
  32 + size_t n2;
  33 + size_t classIndx; //class index in class mean matrix
  34 +
  35 + for(size_t c = 0; c < nC; c++){
  36 + tempsbval = 0;
  37 + classIndx = c * ub;
  38 + n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix
  39 + n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix
  40 + tempsbval = ((gpuCM[classIndx + n1] - gpuM[n1]) *(gpuCM[classIndx + n2] - gpuM[n2])) * (float)gpu_nPxInCls[c] ;
  41 + gpuSb[gnomeIndx * f * f + j * f + i] += tempsbval;
  42 + }
  43 +}
  44 +
  45 +
  46 +//Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f)
  47 +__global__ void kernel_computeSw(float* gpuSw, unsigned int* gpuP, float* gpuCM, float* gpuF, unsigned int* gpuT, size_t ub, size_t f, size_t p, size_t nC, size_t tP){
  48 + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix
  49 + size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome
  50 + size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //total number of individuals
  51 +
  52 + if(gnomeIndx >= p || i >= f || j >= f) return; //handling segmentation fault
  53 + float tempswval;
  54 +
  55 + size_t n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix
  56 + size_t n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix
  57 + tempswval = 0;
  58 + for(size_t c = 0; c < nC; c++){
  59 + tempswval = 0;
  60 + for(size_t k = 0; k < tP; k++){
  61 + if(gpuT[k] == (c+1) ){
  62 + tempswval += ((gpuF[ k * ub + n1] - gpuCM[c * ub + n1]) * (gpuF[k * ub + n2] - gpuCM[c * ub + n2]));
  63 + }
  64 + }
  65 + gpuSw[gnomeIndx * f * f + j * f + i] += tempswval;
  66 + }
  67 +}
  68 +
  69 +
  70 +
  71 +
  72 + //=============================gpu intialization=============================================
  73 + /// Initialize all GPU pointers used in the GA-GPU algorithm
  74 + /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population
  75 + /// @param p is the population size
  76 + /// @param f is the number of desired features
  77 + /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean
  78 + /// @param cpuM is a pointer to the class mean on the CPU
  79 + /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class
  80 + /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class
  81 + /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices
  82 + /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices
  83 + /// @param gpuF is the destination for the GPU feature matrix
  84 + /// @param cpuF is the complete feature matrix on the CPU
  85 +
  86 + void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation
  87 + float** gpuCM, float* cpuCM, size_t nC, unsigned int ub,
  88 + float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls,
  89 + float** gpuSb, float** gpuSw,
  90 + float** gpuF, float* cpuF,
  91 + unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls){
  92 +
  93 + HANDLE_ERROR(cudaMalloc(gpuP, p * f * sizeof(unsigned int))); //allocate space for the population on the GPU
  94 +
  95 + HANDLE_ERROR(cudaMalloc(gpuCM, nC * ub * sizeof(float))); //allocate space for the class mean and copy it to the GPU
  96 + HANDLE_ERROR(cudaMemcpy(*gpuCM, cpuCM, nC * ub * sizeof(float), cudaMemcpyHostToDevice));
  97 +
  98 +
  99 + HANDLE_ERROR(cudaMalloc(gpuM, ub * sizeof(float))); //allocate space for the mean of the feature matrix
  100 + HANDLE_ERROR(cudaMemcpy(*gpuM, cpuM, ub * sizeof(float), cudaMemcpyHostToDevice));
  101 +
  102 + HANDLE_ERROR(cudaMalloc(gpu_nPxInCls, nC * sizeof(unsigned int))); //number of pixels in each class
  103 + HANDLE_ERROR(cudaMemcpy(*gpu_nPxInCls, cpu_nPxInCls, nC * sizeof(unsigned int), cudaMemcpyHostToDevice));
  104 +
  105 +
  106 + HANDLE_ERROR(cudaMalloc(gpuSb, p * f * f * sizeof(float))); //allocate memory for sb which is calculated for eery class separately and added together in different kernel
  107 + HANDLE_ERROR(cudaMalloc(gpuSw, p * f * f * sizeof(float)));
  108 +
  109 + HANDLE_ERROR(cudaMalloc(gpuF, tP * ub * sizeof(float)));
  110 + HANDLE_ERROR(cudaMemcpy(*gpuF, cpuF, tP * ub * sizeof(float), cudaMemcpyHostToDevice));
  111 +
  112 + HANDLE_ERROR(cudaMalloc(gpuT, tP * sizeof(unsigned int)));
  113 + HANDLE_ERROR(cudaMemcpy(*gpuT, cpuT, tP* sizeof(unsigned int), cudaMemcpyHostToDevice));
  114 +
  115 + }
  116 +
  117 + //computation on GPU
  118 + /// Initialize all GPU pointers used in the GA-GPU algorithm
  119 + /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population
  120 + /// @param p is the population size
  121 + /// @param f is the number of desired features
  122 + /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices
  123 + /// @param cpuSb is the between-class scatter matrix on the GPU (this function will copy the GPU result there)
  124 + /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices
  125 + /// @param cpuSw is the within-class scatter matrix on the GPU (this function will copy the GPU result there)
  126 +
  127 + /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean
  128 + /// @param cpuM is a pointer to the class mean on the CPU
  129 + /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class
  130 + /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class
  131 +
  132 + /// @param gpuF is the destination for the GPU feature matrix
  133 + /// @param cpuF is the complete feature matrix on the CPU
  134 + void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f,
  135 + float* gpuSb, float* cpuSb,
  136 + float* gpuSw, float* cpuSw,
  137 + float* gpuF, unsigned int* gpuT,float* gpuM, float* gpuCM,
  138 + size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile){
  139 +
  140 + timer.start();
  141 + HANDLE_ERROR(cudaMemcpy(gpuP, cpuP, p * f * sizeof(unsigned int), cudaMemcpyHostToDevice));
  142 + HANDLE_ERROR(cudaMemset(gpuSb, 0, p * f * f * sizeof(float)));
  143 +
  144 + //grid configuration of GPU
  145 + size_t threads = (size_t)sqrt(props.maxThreadsPerBlock);
  146 + if(threads > f) threads = f;
  147 + size_t numberofblocksfor_f = (size_t)ceil((float)f/ threads);
  148 + dim3 blockdim((int)threads, (int)threads, 1);
  149 + dim3 griddim((int)numberofblocksfor_f, (int)numberofblocksfor_f, (int)p); //X dimension blocks will cover all gnomes of the population and each block will have as many gnomes as it can feet
  150 + //sharedbytes calculation
  151 + size_t sharedBytes = p * f * sizeof(unsigned int); //copy population to shared memory
  152 + if(props.sharedMemPerBlock < sharedBytes) sharedBytes = props.sharedMemPerBlock;
  153 +
  154 + //launch kernel to compute sb matrix
  155 + kernel_computeSb<<<griddim, blockdim, sharedBytes>>>(gpuSb, gpuP, gpuM, gpuCM, ub, f, p, nC, gpu_nPxInCls);
  156 + cudaDeviceSynchronize();
  157 +
  158 + HANDLE_ERROR(cudaMemcpy(cpuSb, gpuSb, p * f * f * sizeof(float), cudaMemcpyDeviceToHost)); //copy between class scatter from gpu to cpu
  159 + const auto elapsedg1 = timer.time_elapsed();
  160 + if(gen > gnrtn -2){
  161 + std::cout << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl;
  162 + profilefile << "Sb gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg1).count() << "us" << std::endl;
  163 + }
  164 +
  165 + timer.start();
  166 + //Compute within class scatter
  167 + HANDLE_ERROR(cudaMemset(gpuSw, 0, p * f * f * sizeof(float)));
  168 +
  169 + //launch kernel to compute sb matrix
  170 + kernel_computeSw<<<griddim, blockdim>>>(gpuSw, gpuP, gpuCM, gpuF, gpuT, ub, f, p, nC, tP);
  171 + cudaDeviceSynchronize();
  172 + //copy between class scatter from gpu to cpu
  173 + HANDLE_ERROR(cudaMemcpy(cpuSw, gpuSw, p * f * f * sizeof(float), cudaMemcpyDeviceToHost));
  174 + const auto elapsedg2 = timer.time_elapsed();
  175 + if(gen > gnrtn - 2){
  176 + std::cout << "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl;
  177 + profilefile<< "Sw gpu time "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsedg2).count() << "us" << std::endl;
  178 + }
  179 +
  180 + }
  181 +
  182 + //free all gpu pointers
  183 + void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT){
  184 +
  185 + HANDLE_ERROR(cudaFree(gpuP));
  186 + HANDLE_ERROR(cudaFree(gpuCM));
  187 + HANDLE_ERROR(cudaFree(gpuM));
  188 + HANDLE_ERROR(cudaFree(gpu_nPxInCls));
  189 + HANDLE_ERROR(cudaFree(gpuSb));
  190 + HANDLE_ERROR(cudaFree(gpuSw));
  191 + HANDLE_ERROR(cudaFree(gpuF));
  192 + HANDLE_ERROR(cudaFree(gpuT));
  193 + }
  194 +
  195 +#endif
  196 +
... ...
src/ga_gpu.h 0 → 100644
  1 +++ a/src/ga_gpu.h
  1 +#ifndef GA_GPU_H
  2 +#define GA_GPU_H
  3 +
  4 +#include <iostream>
  5 +#include <thread>
  6 +#include <complex>
  7 +#include <cv.h>
  8 +#include <stdio.h>
  9 +#include <stdlib.h>
  10 +#include <iostream>
  11 +
  12 +#include "timer.h"
  13 +
  14 +#include "basic_functions.h"
  15 +//LAPACKE support for Visual Studio
  16 +
  17 +#ifndef LAPACK_COMPLEX_CUSTOM
  18 +#define LAPACK_COMPLEX_CUSTOM
  19 +#define lapack_complex_float std::complex<float>
  20 +#define lapack_complex_double std::complex<double>
  21 +#include "lapacke.h"
  22 +#endif
  23 +
  24 +
  25 +#define LAPACK_ROW_MAJOR 101
  26 +#define LAPACK_COL_MAJOR 102
  27 +
  28 +//CUDA functions
  29 +void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation
  30 + float** gpuCM, float* cpuCM, size_t nC, unsigned int ub,
  31 + float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls,
  32 + float** gpuSb, float** gpuSw,
  33 + float** gpuF, float* cpuF,
  34 + unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls);
  35 +void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f,
  36 + float* gpuSb, float* cpuSb,
  37 + float* gpuSw, float* cpuSw,
  38 + float* gpuF, unsigned int* T, float* gpuM, float* gpuCM,
  39 + size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile);
  40 +void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT);
  41 +
  42 +struct _fcomplex { float re, im; };
  43 +typedef struct _fcomplex fcomplex;
  44 +
  45 +Timer timer;
  46 +
  47 +class ga_gpu {
  48 +
  49 +public:
  50 + float* F; //pointer to the raw data in host memory
  51 + unsigned int* T; //pointer to the class labels in host memory
  52 + size_t gnrtn; //total number of generations
  53 + size_t p; //population size
  54 + size_t f; // number of features to be selected
  55 +
  56 + unsigned int* P; //pointer to population of current generation genotype matrix (p x f)
  57 + float* S; //pointer to score(fitness value) of each gnome from current population matric P
  58 + unsigned int* i_guess; //initial guess of features if mentioined in args add to initial population
  59 + unsigned int ub; //upper bound for gnome value (maximum feature index from raw feature matrix F)
  60 + unsigned int lb; //lower bound for gnome value (minimum feature index from raw feature matrix F = 0)
  61 + float uniformRate;
  62 + float mutationRate;
  63 + size_t tournamentSize; //number of potential gnomes to select parent for crossover
  64 + bool elitism; //if true then passes best gnome to next generation
  65 +
  66 + //declare gpu pointers
  67 + float* gpuF; //Feature matrix
  68 + unsigned int* gpuT; //target responses of entire feature matrix
  69 + unsigned int* gpuP; //population matrix
  70 + unsigned int* gpu_nPxInCls;
  71 + float* gpuCM; //class mean of entire feature matrix
  72 + float* gpuM; //total mean of entire feature matrix
  73 + float* gpuSb; //between class scatter for all individuals of current population
  74 + float* gpuSw; //within class scatter for all individuals of current population
  75 +
  76 + //constructor
  77 + ga_gpu() {}
  78 +
  79 + //==============================generate initial population
  80 +
  81 + void initialize_population(std::vector<unsigned int> i_guess, bool debug) {
  82 + if (debug) {
  83 + std::cout << std::endl;
  84 + std::cout << "initial populatyion is: " << std::endl;
  85 + }
  86 +
  87 + lb = 0;
  88 + P = (unsigned int*)calloc(p * f, sizeof(unsigned int)); //allcate memory for genetic population(indices of features from F), p number of gnomes of size f
  89 + S = (float*)calloc(p, sizeof(float)); //allcate memory for scores(fitness value) of each gnome from P
  90 +
  91 + srand(1);
  92 + //add intial guess to the population if specified by user as a output of other algorithm or by default just random guess
  93 + std::memcpy(P, i_guess.data(), f * sizeof(unsigned int));
  94 +
  95 + //generate random initial population
  96 + for (size_t i1 = 1; i1 < p; i1++) {
  97 + for (size_t i2 = 0; i2 < f; i2++) {
  98 + P[i1 * f + i2] = rand() % ub + lb; //select element of gnome as random feature index within lower bound(0) and upper bound(B)
  99 + if (debug) std::cout << P[i1 * f + i2] << "\t";
  100 + }
  101 + if (debug) std::cout << std::endl;
  102 + }
  103 + }
  104 +
  105 + //===================generation of new population==========================================
  106 +
  107 + size_t evolvePopulation(unsigned int* newPop, float* M, bool debug) {
  108 +
  109 + //gget index of best gnome in the current population
  110 + size_t bestG_Indx = gIdxbestGnome();
  111 + //-------------(reproduction)-------
  112 + if (elitism) {
  113 + saveGnomeIdx(0, bestG_Indx, newPop); //keep best gnome from previous generation to new generation
  114 + }
  115 + // ------------Crossover population---------------
  116 + int elitismOffset;
  117 + if (elitism) {
  118 + elitismOffset = 1;
  119 + }
  120 + else {
  121 + elitismOffset = 0;
  122 + }
  123 +
  124 + //Do crossover for rest of population size
  125 + for (int i = elitismOffset; i <p; i++) {
  126 + // std::cout<<"crossover of gnome "<<i<<std::endl;
  127 + std::vector<unsigned int>gnome1;
  128 + gnome1.reserve(f);
  129 + gnome1 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes
  130 + // displaygnome(gnome1);
  131 + std::vector<unsigned int>gnome2;
  132 + gnome2.reserve(f);
  133 + gnome2 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes
  134 + // displaygnome(gnome2);
  135 + std::vector<unsigned int>gnome;
  136 + gnome.reserve(f);
  137 + gnome = crossover(gnome1, gnome2, M); //Do crossover of above parent gnomes to produce new gnome
  138 + // displaygnome(gnome);
  139 + saveGnome(i, gnome, newPop); //save crosseover result to new population
  140 + }
  141 +
  142 + //--------------Mutate population------------
  143 + // introduce some mutation in new population
  144 + for (int i = elitismOffset; i <p; i++) {
  145 + //std::cout<<"mutation of gnome"<<std::endl;
  146 + std::vector<unsigned int>gnome;
  147 + gnome.reserve(f);
  148 +
  149 + for (size_t n = 0; n < f; n++)
  150 + gnome.push_back(newPop[i*f + n]);
  151 + //std::cout<<"\n starting address "<<(&newPop[0] + i*f)<<"\t end address is "<<(&newPop[0] + i*f + f-1) <<std::endl;
  152 + //std::copy((&newPop[0] + i*f), (&newPop[0] + i*f +f-1), gnome.begin());
  153 + // displaygnome(gnome);
  154 + mutate(gnome);
  155 + // displaygnome(gnome);
  156 + saveGnome(i, gnome, newPop); //save new gnome to new population at position i
  157 + }
  158 + return bestG_Indx;
  159 + }
  160 +
  161 + //============================== functions for population evolution ===========================================================================
  162 + std::vector<unsigned int> tournamentSelection(size_t tSize) {
  163 + // Create a tournament population
  164 + unsigned int* tournamentP = (unsigned int*)malloc(tSize * f * sizeof(unsigned int));
  165 + std::vector<float>tournamentS;
  166 +
  167 + // For each place in the tournament get a random individual
  168 + for (size_t i = 0; i < tSize; i++) {
  169 + size_t rndmIdx = rand() % p + lb;
  170 + tournamentS.push_back(S[rndmIdx]);
  171 + //for (size_t n = 0; n <f; n++)
  172 + //tournamentP[i * f + n] = (getGnome(rndmIdx)).at(n);
  173 + std::vector<unsigned int> temp_g(getGnome(rndmIdx));
  174 + std::copy(temp_g.begin(), temp_g.end(), tournamentP + i*f);
  175 + }
  176 + // Get the fittest
  177 + std::vector<unsigned int>fittestgnome;
  178 + fittestgnome.reserve(f);
  179 +
  180 + //select index of best gnome from fitness score
  181 + size_t bestSIdx = 0;
  182 + for (size_t i = 0; i < tSize; i++) {
  183 + if (tournamentS[i] < tournamentS[bestSIdx])
  184 + bestSIdx = i; //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  185 + }
  186 +
  187 + for (size_t n = 0; n < f; n++)
  188 + fittestgnome.push_back(tournamentP[bestSIdx * f + n]);
  189 + return fittestgnome;
  190 + } //end of tournament selection
  191 +
  192 +
  193 + std::vector<unsigned int> crossover(std::vector<unsigned int> gnome1, std::vector<unsigned int> gnome2, float* M) {
  194 + std::vector<unsigned int> gnome;
  195 + for (size_t i = 0; i < f; i++) {
  196 + // Crossover
  197 + float r = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
  198 + if (r <= uniformRate) {
  199 + gnome.push_back(gnome1.at(i));
  200 + }
  201 + else {
  202 + gnome.push_back(gnome2.at(i));
  203 + }
  204 + }
  205 +
  206 + //check new gnome for all zero bands and duplicated values
  207 + std::vector<unsigned int> gnomeunique;
  208 + int flag = 0;
  209 + std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
  210 + std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique));
  211 + /* if(gnomeunique.size()< gnome.size()){
  212 + flag = 1;
  213 + std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
  214 + }*/
  215 + unsigned int featureband, featureband1, featureband2;
  216 + if (gnomeunique.size() < f) {
  217 + for (size_t k = gnomeunique.size(); k < f; k++) {
  218 + featureband = rand() % ub + lb;
  219 + for (size_t i = 0; i < f; i++) {
  220 + featureband1 = gnome1.at(i);
  221 + featureband2 = gnome2.at(i);
  222 + for (size_t j = 0; j < gnomeunique.size(); j++) {
  223 + if (gnomeunique.at(j) != featureband1) {
  224 + featureband = featureband1;
  225 + }
  226 + else if (gnomeunique.at(j) != featureband2) {
  227 + featureband = featureband2;
  228 + }
  229 + else if (gnomeunique.at(j) == featureband) {
  230 + featureband = rand() % ub + lb;
  231 + while (M[featureband] == 0) {
  232 + featureband = rand() % ub + lb;
  233 + }
  234 + }
  235 + }
  236 + }
  237 + gnomeunique.push_back(featureband);
  238 + }
  239 + }
  240 + //if(flag ==1){
  241 + // std::cout<<"\n original gnome "<<g<<" are "<<std::endl;
  242 + // for(int k = 0; k < gnome.size(); k++)
  243 + // std::cout<<gnome[k]<<"\t";
  244 + // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl;
  245 + // for(int k = 0; k < gnomeunique.size(); k++)
  246 + // std::cout<<gnomeunique[k]<<"\t";
  247 + //}
  248 +
  249 + return gnomeunique;
  250 + }
  251 +
  252 + void mutate(std::vector<unsigned int> gnome) {
  253 + for (size_t i = 0; i < f; i++) {
  254 + float LO = (float)0.01;
  255 + float HI = 1;
  256 + float r3 = LO + static_cast <float> (rand()) / (static_cast <float> (RAND_MAX / (HI - LO)));
  257 + //if random value is less than mutationRate then mutate this gnome
  258 + if (r3 <= mutationRate) {
  259 + gnome.at(i) = (rand() % ub + lb);
  260 + gnome.push_back(rand() % ub + lb);
  261 + }
  262 + }
  263 + }
  264 +
  265 + ///returns gnome of given index
  266 + std::vector<unsigned int> getGnome(size_t idx) {
  267 + std::vector<unsigned int> gnome;
  268 + gnome.reserve(f);
  269 + //pulling gnome idx from population P
  270 + for (size_t n = 0; n < f; n++)
  271 + gnome.push_back(P[idx * f + n]);
  272 + //memcpy(&gnome[0], P+idx*f, f*sizeof(size_t));
  273 + return gnome;
  274 + }
  275 +
  276 + //save gnome of index gIdx from previous population at position i in the new population
  277 + void saveGnomeIdx(size_t i, size_t gIdx, unsigned int* newPop) {
  278 + for (size_t n = 0; n < f; n++)
  279 + newPop[i * f + n] = P[gIdx * f + n];
  280 + }
  281 +
  282 + void saveGnome(size_t idx, std::vector<unsigned int>gnome, unsigned int* newPop) {
  283 + std::copy(gnome.begin(), gnome.end(), newPop + idx*f);
  284 + }
  285 +
  286 + size_t gIdxbestGnome() {
  287 + //std::cout<<"best gnome indes is: "<<sortSIndx()[0];
  288 + return sortSIndx()[0];
  289 + }
  290 +
  291 + void displaygnome(std::vector<unsigned int> gnome) {
  292 + std::cout << "\t gnome: ";
  293 + for (int i = 0; i<gnome.size(); ++i)
  294 + std::cout << gnome[i] << ' ';
  295 + std::cout << std::endl;
  296 + }
  297 +
  298 + //---------------------post processing of score-------------------------------------
  299 + void Snorm() { //normalize gnome scores
  300 + double s;
  301 + for (size_t i = 0; i < p; i++) {
  302 + s += S[i]; //sum of all gnome score in population
  303 + }
  304 + //std::cout<<"mean Score is: "<<(double) s/p;
  305 + for (size_t i = 0; i <p; i++)
  306 + S[i] = S[i] / s;
  307 + }
  308 +
  309 + size_t* sortSIndx() { //sort gnome index according to gnome scores
  310 + //sort indices of score in ascending order (fitness value)
  311 + size_t *idx = (size_t*)malloc(p * sizeof(size_t)); //array to hold sorted gnome index
  312 + for (size_t i = 0; i < p; i++) { //initialize index array from 1 to p(population size) in an ascending order
  313 + idx[i] = i;
  314 + }
  315 +
  316 + for (size_t i = 0; i<p; i++) { //sort gnome indices according to score values using bubble sort
  317 + for (size_t j = i + 1; j<p; j++) {
  318 + if (S[idx[i]] > S[idx[j]]) {
  319 + std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  320 + }
  321 + }
  322 + }
  323 +
  324 + //display best gnome
  325 + //std::cout << "best fitness value: " << S[idx[0]] << std::endl;
  326 + /*if (S[idx[0]] < 0) {
  327 + std::cout << "best gnome is " << std::endl;
  328 + for (size_t i = 0; i < f; i++)
  329 + std::cout << P[f * idx[0] + i] << ", ";
  330 + std::cout << std::endl;
  331 + }*/
  332 +
  333 + return idx; //use as sortSIdx in selection
  334 + }
  335 +
  336 +
  337 + //size_t* sortIndx(float* input, size_t size) {
  338 + // //sort indices of score in ascending order (fitness value)
  339 + // size_t *idx;
  340 + // idx = (size_t*)malloc(size * sizeof(size_t));
  341 + // for (size_t i = 0; i < size; i++)
  342 + // idx[i] = i;
  343 +
  344 + // for (size_t i = 0; i<size; i++) {
  345 + // for (size_t j = i + 1; j<size; j++) {
  346 + // if (input[idx[i]] < input[idx[j]]) {
  347 + // std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error
  348 + // }
  349 + // }
  350 + // }
  351 + // return idx; //use as sortSIdx in selection
  352 +
  353 + //}
  354 +
  355 + void generateNewP(unsigned int* newPop) {
  356 + //std::memcpy(P, 0 , p * f *sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s
  357 + std::memcpy(P, newPop, p * f * sizeof(unsigned int)); //copy sb of gnome 'g' into bufferarray tempg_s
  358 + }
  359 +
  360 + //============================== functions for fitness function ===========================================================================
  361 + //compute total mean M (1 X B) of all features (tP X B)
  362 + void ttlMean(float* M, size_t tP, size_t B) {
  363 + //std::cout<<"total number of pixels are "<<tP<<std::endl;
  364 + for (int k = 0; k < tP; k++) { //total number of pixel in feature matrix
  365 + for (size_t n = 0; n < B; n++) { // index of feature in ith gnome
  366 + M[n] += F[k * B + n];
  367 + }
  368 + }
  369 + for (size_t n = 0; n < B; n++) //take an avarage of above summation
  370 + M[n] = M[n] / (float)tP;
  371 + }
  372 +
  373 + void dispalymean(float* M) { //display mean
  374 + std::cout << std::endl;
  375 + std::cout << "Total mean of gnome 1 features are is " << std::endl;
  376 +
  377 + for (size_t i = 0; i < 1; i++) {
  378 + for (size_t j = 0; j < f; j++) {
  379 + size_t index = P[i*f + j];
  380 + std::cout << "feature index " << index << "\t total mean" << M[index] << std::endl;
  381 + }
  382 + }
  383 + std::cout << std::endl;
  384 + }
  385 +
  386 + //Compute class means cM (p x nC x f) of all gnome features phe(tP x f)
  387 + void classMean(float* cM, size_t tP, size_t nC, size_t B, std::vector<unsigned int> nPxInCls) {
  388 + for (size_t c = 0; c < nC; c++) { //index of class feature matrix responses
  389 + float* tempcM = (float*)calloc(B, sizeof(float)); //tempcM holds classmean vector for current gnome 'i', class 'c'
  390 + for (size_t k = 0; k < tP; k++) { //total number of pixel in feature matrix
  391 + if (T[k] == c + 1) { //class numbers start from 1 not 0
  392 + for (size_t n = 0; n < B; n++) { //total number of features in a gnome
  393 + tempcM[n] += F[k * B + n]; //add phe value for feature n of class 'c' in ith gnome
  394 + }
  395 + }
  396 + }
  397 + for (size_t n = 0; n < B; n++)
  398 + cM[c * B + n] = tempcM[n] / (float)nPxInCls[c]; //divide by number of pixels from class 'c'
  399 +
  400 + }
  401 +
  402 + }
  403 +
  404 + //display class mean
  405 + void dispalyClassmean(float* cM, size_t nC) {
  406 + std::cout << std::endl;
  407 + std::cout << "class mean of gnome 1 with total classes " << nC << " is :" << std::endl;
  408 + for (size_t i = 0; i < 1; i++) {
  409 + for (size_t c = 0; c < nC; c++) {
  410 + for (size_t j = 0; j < f; j++) {
  411 + size_t index = P[i*f + j];
  412 +
  413 + std::cout << "class index: " << c << "\t feature index " << index << "\t class mean " << cM[c * ub + index] << std::endl;
  414 + }
  415 + }
  416 + }
  417 + std::cout << std::endl;
  418 + }
  419 +
  420 + //-----------------------------------------between and within class Scattering computation---------------------------------------------------------------
  421 + //computation on CPU
  422 + void cpu_computeSbSw(float* sb, float* sw, float* M, float* cM, size_t nC, size_t tP, std::vector<unsigned int> nPxInCls) {
  423 + timer.start();
  424 + computeSb(sb, M, cM, nC, nPxInCls); //compute between class scatter on CPU
  425 + const auto elapsed = timer.time_elapsed();
  426 + std::cout << "Sb CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count() << "us" << std::endl;
  427 +
  428 + timer.start();
  429 + computeSw(sw, cM, nC, tP); //compute within class scatter on CPU
  430 + const auto elapsed1 = timer.time_elapsed();
  431 + std::cout << "Sw CPU time " << std::chrono::duration_cast<std::chrono::microseconds>(elapsed1).count() << "us" << std::endl;
  432 + }
  433 +
  434 + //display between class scatter
  435 + void displaySb(float* sb) {
  436 + std::cout << "between scatter is " << std::endl;
  437 + for (size_t g = 0; g<1; g++) {
  438 + std::cout << std::endl;
  439 + for (size_t j = 0; j < f; j++) { //total number of features in a gnome
  440 + for (size_t k = 0; k < f; k++) { //total number of features in a gnome
  441 + std::cout << sb[g * f * f + j * f + k] << " ";
  442 + }
  443 + std::cout << std::endl;
  444 + }
  445 + }
  446 + std::cout << std::endl;
  447 + }
  448 +
  449 + //Compute between class scatter sb (p x f x f) of all gnome features phe(tP x f)
  450 + void computeSb(float* sb, float* M, float* cM, size_t nC, std::vector<unsigned int> nPxInCls) {
  451 + float tempsbval;
  452 + size_t n1;
  453 + size_t n2;
  454 + size_t classIndx; //class index in class mean matrix
  455 + /*std::cout <<"population of computation of cpusb "<< std::endl;
  456 + for (size_t i2 = 0; i2 < f; i2++) {
  457 + std::cout << P[i2] << "\t";
  458 + }*/
  459 +
  460 + for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) {
  461 + for (size_t c = 0; c < nC; c++) {
  462 + for (size_t i = 0; i < f; i++) {
  463 + for (size_t j = 0; j < f; j++) {
  464 + tempsbval = 0;
  465 + classIndx = c * ub;
  466 + n1 = P[gnomeIndx * f + i]; //actual feature index in original feature matrix
  467 + n2 = P[gnomeIndx * f + j]; //actual feature index in original feature matrix
  468 + // std::cout << "i: " << i << " j: " <<j<< " n1: " << n1 << " n2:" << n2 << std::endl;
  469 + tempsbval = ((cM[classIndx + n1] - M[n1]) *(cM[classIndx + n2] - M[n2]));
  470 + sb[gnomeIndx * f * f + i * f + j] += tempsbval * (float)nPxInCls[c]; // compute tempsb[j][k] element of class 'c' of gnome 'i'
  471 + }
  472 + }
  473 + }
  474 + }
  475 +
  476 + }
  477 +
  478 + //Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f)
  479 + void computeSw(float* sw, float* cM, size_t nC, size_t tP) {
  480 + float tempswval;
  481 + size_t n1;
  482 + size_t n2;
  483 + size_t cMclass; //class index in class mean matrix
  484 + size_t Pg;
  485 + size_t swg;
  486 + size_t pheg;
  487 + for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) {
  488 + Pg = gnomeIndx * f;
  489 + swg = gnomeIndx * f * f;
  490 + pheg = gnomeIndx * tP * f;;
  491 + for (size_t c = 0; c < nC; c++) {
  492 + cMclass = c * ub;
  493 +
  494 + for (size_t k = 0; k < tP; k++) {
  495 + if (T[k] == (c + 1)) {
  496 + for (size_t i = 0; i < f; i++) {
  497 + for (size_t j = 0; j < f; j++) {
  498 + n1 = P[Pg + i]; //actual feature index in original feature matrix
  499 + n2 = P[Pg + j]; //actual feature index in original feature matrix
  500 +
  501 + tempswval = 0;
  502 + tempswval = ((F[k * ub + n1] - cM[cMclass + n1]) * (F[k * ub + n2] - cM[cMclass + n2]));
  503 + //tempswval = ((phe[gnomeIndx * tP * f + k * f + i] - cM[c * ub + P[gnomeIndx * f + i]]) * (phe[gnomeIndx * tP *f + k * f + j] - cM[c * ub + P[gnomeIndx * f + j]]));
  504 + sw[gnomeIndx * f * f + i * f + j] += tempswval;
  505 + }
  506 + }
  507 + }
  508 + }
  509 + }
  510 +
  511 + }
  512 + }
  513 + //checking bands with all zeros and replacing duplicated bands in gnome but this function is only for initial population
  514 + //void zerobandcheck(float* M, bool initial) {
  515 + // for (size_t g = 0; g < p; g++) { // for each gnome
  516 + // for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome
  517 + // while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population
  518 + // P[g * f + i] = rand() % ub + lb;
  519 + // }
  520 + // }
  521 + // //checking for duplicats in a gnome
  522 + // std::vector<unsigned int> gnome = getGnome(g);
  523 + // std::vector<unsigned int> gnomeunique;
  524 + // int flag = 0; //flag will be set if gnome has duplicated band (feature) index
  525 + // std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
  526 + // std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //keep only unique copies of indices and remove duplicate copies
  527 + // if (gnomeunique.size()< gnome.size()) {
  528 + // flag = 1; //set flag for those if there are duplicated indices
  529 + // //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
  530 + // }
  531 +
  532 + // //adding extra random feature indices to unique copy of gnome to achive gnome size = f
  533 + // if (gnomeunique.size() < f) {
  534 + // for (size_t k = gnomeunique.size(); k < f; k++) {
  535 + // unsigned int rnumber = rand() % ub + lb;
  536 + // //check if this randomaly generated number is already present in that gnome or not
  537 + // for (size_t j = 0; j < gnomeunique.size(); j++) {
  538 + // if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number
  539 + // rnumber = rand() % ub + lb;
  540 + // j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome
  541 + // }
  542 + // }
  543 + // gnomeunique.push_back(rnumber); //add feature index to gnomeunique
  544 + // }
  545 + // }
  546 + // std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f);
  547 + // }
  548 + //}
  549 +
  550 + //checking bands with all zeros and replacing duplicated bands in gnome
  551 + void zerobandcheck(float* M, bool initialPop) {
  552 + size_t startgnome;
  553 + if (initialPop) {
  554 + startgnome = 0; //for initial population check all gnomes
  555 + }
  556 + else {
  557 + startgnome = 1; //for next generations start gnome check after elite children offset
  558 + }
  559 + for (size_t g = startgnome; g < p; g++) { // for each gnome except
  560 +
  561 + for (size_t i = 0; i < f; i++) { //check each band (feature) index in that gnome
  562 + while (M[P[g * f + i]] == 0) { //if mean of band is zero then replace band index in population
  563 + P[g * f + i] = rand() % ub + lb;
  564 + }
  565 + }
  566 + //checking for duplicats in a gnome
  567 + std::vector<unsigned int> gnome = getGnome(g); //get current gnome g from population matrix P
  568 + std::vector<unsigned int> gnomeunique; //array to store only unique band indicies in a genome
  569 + int flag = 0; //flag will be set if gnome has duplicated band (feature) index
  570 + std::sort(gnome.begin(), gnome.end()); //sort current gnome
  571 + std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //remove duplicat copies of band indices and keep only unique in a gnome
  572 + if (gnomeunique.size()< gnome.size()) {
  573 + flag = 1; //set flag for those if there are duplicated indices
  574 + //std::cout<<"gnome:["<<g<<"] "<<"\t duplications are "<< (gnome.size() - gnomeunique.size())<<std::endl;
  575 + }
  576 +
  577 + //adding extra random feature indices to unique copy of gnome to achive gnome size = f
  578 + if (gnomeunique.size() < f) {
  579 + for (size_t k = gnomeunique.size(); k < f; k++) {
  580 + unsigned int rnumber = rand() % ub + lb;
  581 + //check if this randomaly generated number is already present in that gnome or not
  582 + for (size_t j = 0; j < gnomeunique.size(); j++) {
  583 + if (gnomeunique.at(j) == rnumber) { //if new index is duplicated copy of any of previous gnome element replace it with another random number
  584 + rnumber = rand() % ub + lb; //generate random number between upper bound and lower bound (ub. lb)
  585 + j = 0; //set j = 0 to start checking of duplication of feature index from the first element of gnome
  586 + }
  587 + }
  588 + gnomeunique.push_back(rnumber); //add feature index to gnomeunique
  589 + }
  590 + }
  591 +
  592 + //diplay loop only if gnome has duplicated indices
  593 + //if(flag ==1){
  594 + // std::cout<<"\n original gnome "<<g<<" are "<<std::endl;
  595 + // for(int k = 0; k < gnome.size(); k++)
  596 + // std::cout<<gnome[k]<<"\t";
  597 + // std::cout<<"\n unique results in cpp for gnome "<<g<<" are "<<std::endl;
  598 + // for(int k = 0; k < gnomeunique.size(); k++)
  599 + // std::cout<<gnomeunique[k]<<"\t";
  600 + //}
  601 + std::copy(gnomeunique.begin(), gnomeunique.end(), P + g * f); //copy new gnome without any duplicate band index at current gnome location
  602 + }
  603 + }
  604 +
  605 +
  606 +
  607 + //gpu calling functions
  608 + //gpu initialization (allocating space for all array on GPU)
  609 + void gpuInitializationfrommain(float* cpuM, float* cpuCM, std::vector<unsigned int>cpu_nPxInCls, size_t tP, size_t nC) {
  610 + // call gpuInitialization(......) with all of the necessary parameters
  611 + gpuIntialization(&gpuP, p, f, &gpuCM, cpuCM, nC, ub, &gpuM, cpuM, &gpu_nPxInCls, &gpuSb, &gpuSw, &gpuF, F, &gpuT, T, tP, &cpu_nPxInCls[0]);
  612 +
  613 + }
  614 +
  615 + //Computation of between class scatter and within class scatter in GPU
  616 + void gpu_computeSbSw(float* cpuSb, float* cpuSw, size_t nC, size_t tP, cudaDeviceProp props, size_t gen, bool debug, std::ofstream& profilefile) {
  617 + //calling function for SW and Sb computation and passing necessary arrays for computation
  618 + // std::cout<<"gpu function calling"<<std::endl;
  619 + gpucomputeSbSw(gpuP, P, p, f, gpuSb, cpuSb, gpuSw, cpuSw, gpuF, gpuT, gpuM, gpuCM, nC, tP, props, gen, gnrtn, ub, gpu_nPxInCls, profilefile);
  620 +
  621 + //display computed Sb and Sw if debug is set
  622 + if (debug) {
  623 + std::cout << "From GA-GPU class: gpu results of Sb sn Sw" << std::endl;
  624 + displayS(cpuSb, f); //display Sb
  625 + displayS(cpuSw, f); //display Sw
  626 + std::cout << std::endl;
  627 + }
  628 + }
  629 +
  630 + //call function to free gpu pointers
  631 + //free all gpu pointers
  632 + void gpu_Destroy() {
  633 + gpuDestroy(gpuP, gpuCM, gpuM, gpu_nPxInCls, gpuSb, gpuSw, gpuF, gpuT);
  634 + }
  635 +
  636 + //Write a destructor here
  637 + ~ga_gpu() {
  638 +
  639 + if (F != NULL) std::free(F); //not sure about this as it is only for 2nd constructor
  640 + if (T != NULL) std::free(T); //same as above
  641 + if (P != NULL) std::free(P); //not sure about this as it is only for 2nd constructor
  642 + if (S != NULL) std::free(S); //same as above
  643 + //if(i_guess!=NULL) std::free(i_guess); //same as above
  644 + //HANDLE_ERROR(cudaDeviceReset());
  645 +
  646 + }
  647 + };
  648 +
  649 +#endif
... ...
src/main.cpp 0 → 100644
  1 +++ a/src/main.cpp
  1 +#include <iostream>
  2 +
  3 +//stim libraries
  4 +#include <stim/envi/envi.h>
  5 +#include <stim/image/image.h>
  6 +#include <stim/ui/progressbar.h>
  7 +#include <stim/parser/filename.h>
  8 +#include <stim/parser/table.h>
  9 +#include <stim/parser/arguments.h>
  10 +//input arguments
  11 +stim::arglist args;
  12 +#include <fstream>
  13 +#include <thread>
  14 +#include <random>
  15 +#include <vector>
  16 +#include <math.h>
  17 +#include <limits>
  18 +
  19 +#define NOMINMAX
  20 +
  21 +
  22 +
  23 +//GA
  24 +#include "ga_gpu.h"
  25 +#include "enviload.h"
  26 +
  27 +
  28 +//envi input file and associated parameters
  29 +stim::envi E; //ENVI binary file object
  30 +unsigned int B; //shortcuts storing the spatial and spectral size of the ENVI image
  31 +//mask and class information used for training
  32 +//std::vector< stim::image<unsigned char> > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel#
  33 +std::vector<unsigned int> nP; //array holds the number of pixels in each mask: nP[m] is the number of pixels in mask m
  34 +size_t nC = 0; //number of classes
  35 +size_t tP = 0; //total number of pixels in all masks: tP = nP[0] + nP[1] + ... + nP[nC]
  36 +float* fea;
  37 +
  38 +//ga_gpu class object
  39 +ga_gpu ga;
  40 +bool debug;
  41 +bool binaryClass;
  42 +int binClassOne;
  43 +
  44 +//creating struct to pass to thread functions as it limits number of arguments to 3
  45 +typedef struct {
  46 + float* S;
  47 + float* Sb;
  48 + float* Sw;
  49 + float* lda;
  50 +}gnome;
  51 +gnome gnom;
  52 +
  53 +
  54 +void gpuComputeEignS( size_t g, size_t fea){
  55 + //eigen value computation will return r = (nC-1) eigen vectors so new projected data will have dimension of r rather than f
  56 + // std::thread::id this_id = std::this_thread::get_id();
  57 + // std::cout<<"thread id is "<< this_id<<std::endl;
  58 + size_t f = fea;
  59 + //std::thread::id g = std::this_thread::get_id();
  60 + float* LeftEigVectors_a = (float*) malloc(f * f * sizeof(float));
  61 + float* gSw_a = (float*) malloc(f * f * sizeof(float)); //copy of between class scatter
  62 + std::memcpy(gSw_a, &gnom.Sw[g * f * f], f * f *sizeof(float));
  63 + if(debug){
  64 + std::cout<<"From Eigen function: Sb and Sw "<<std::endl;
  65 + displayS(gSw_a, f); //display Sb
  66 + displayS(&gnom.Sb[g * f * f], f); //display Sw
  67 + std::cout<<std::endl;
  68 + }
  69 +
  70 + std::vector<unsigned int> features = ga.getGnome(g);
  71 + std::vector<unsigned int> featuresunique;
  72 + int flag = 0;
  73 + std::sort(features.begin(), features.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7
  74 + std::unique_copy(features.begin(), features.end(), std::back_inserter(featuresunique));
  75 + if(featuresunique.size()< features.size()){
  76 + f = featuresunique.size();
  77 + }
  78 +
  79 + size_t r = nC-1; //LDA projected dimension (limited to number of classes - 1 by rank)
  80 + if(r > f){
  81 + r = f;
  82 + }
  83 +
  84 + int info;
  85 + float* EigenvaluesI_a = (float*)malloc(f * sizeof(float));
  86 + float* Eigenvalues_a = (float*)malloc(f * sizeof(float));
  87 + int *IPIV = (int*) malloc(sizeof(int) * f);
  88 + //computing inverse of matrix Sw
  89 + memset(IPIV, 0, f * sizeof(int));
  90 + LAPACKE_sgetrf(LAPACK_COL_MAJOR, (int)f, (int)f, gSw_a, (int)f, IPIV);
  91 + // DGETRI computes the inverse of a matrix using the LU factorization computed by DGETRF.
  92 + LAPACKE_sgetri(LAPACK_COL_MAJOR, (int)f, gSw_a, (int)f, IPIV);
  93 +
  94 + float* gSbSw_a = (float*)calloc(f * f, sizeof(float));
  95 + //mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f * sizeof(float)], f, f, f,f);
  96 + mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f], f, f, f,f);
  97 + if(debug){
  98 + std::cout<<"From Eigen function: inverse of sw and ratio of sb and sw (Sb/Sw)";
  99 + displayS(gSw_a, f); //display inverse of Sw (1/Sw)
  100 + displayS(gSbSw_a, f); //display ratio of Sb and Sw (Sb/Sw)
  101 + }
  102 +
  103 + //compute left eigenvectors for current gnome from ratio of between class scatter and within class scatter: Sb/Sw
  104 + info = LAPACKE_sgeev(LAPACK_COL_MAJOR, 'V', 'N', (int)f, gSbSw_a, (int)f, Eigenvalues_a, EigenvaluesI_a, LeftEigVectors_a, (int)f, 0, (int)f);
  105 + //sort eignevalue indices in descending order
  106 + size_t* sortedindx = sortIndx(Eigenvalues_a, f);
  107 + //displayS(LeftEigVectors_a, f); //display Eignevectors (Note these are -1 * matlab eigenvectors does not change fitness score results but keep in mind while projecting data on it)
  108 + //sorting left eigenvectors (building forward transformation matrix As)
  109 + for (size_t rowE = 0; rowE < r; rowE++){
  110 + for (size_t colE = 0; colE < f; colE++){
  111 + size_t ind1 = g * r * f + rowE * f + colE;
  112 + //size_t ind1 = rowE * f + colE;
  113 + size_t ind2 = sortedindx[rowE] * f + colE; //eigenvector as row vector
  114 + gnom.lda[ind1] = LeftEigVectors_a[ind2];
  115 + }
  116 + }
  117 +
  118 + if(debug){
  119 + std::cout<<"Eigenvalues are"<<std::endl;
  120 + for(size_t n = 0 ; n < f; n ++){
  121 + std::cout << Eigenvalues_a[n] << ", " ;
  122 + }
  123 + std::cout<< std::endl;
  124 + std::cout<<"From Eigen function: Eignevector"<<std::endl;
  125 +
  126 + std::cout<<"LDA basis is "<<std::endl;
  127 + std::cout << "r is " << r << std::endl;
  128 + for(size_t l = 0 ; l < r; l++){
  129 + for(size_t n = 0 ; n < f; n ++){
  130 + std::cout << gnom.lda[g * l * f + l * f + n] << ", " ;
  131 + }
  132 + std::cout<<std::endl;
  133 + }
  134 +
  135 + }
  136 + //Extract only r eigne vectors as a LDA projection basis
  137 + float* tempgSb = (float*)calloc(r * f, sizeof(float));
  138 + //mtxMul(tempgSb, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sb[g * f * f * sizeof(float)], r, f, f,f);
  139 + //mtxMul(tempgSb, &lda[g * r * f ], gSb, r, f, f,f);
  140 + mtxMul(tempgSb, &gnom.lda[g * r * f], &gnom.Sb[g * f * f], r, f, f,f);
  141 + float* nSb = (float*)calloc(r * r, sizeof(float));
  142 + mtxMultranspose(nSb, tempgSb, &gnom.lda[g * r * f], r, f, r, f);
  143 +
  144 + float* tempgSw = (float*)calloc(r * f, sizeof(float));
  145 + //mtxMul(tempgSw, &gnom.lda[g * r * f * sizeof(float)], &gnom.Sw[g * f * f * sizeof(float)], r, f, f,f);
  146 + mtxMul(tempgSw, &gnom.lda[g * r * f], &gnom.Sw[g * f * f], r, f, f,f);
  147 + float* nSw = (float*)calloc(r * r, sizeof(float));
  148 + mtxMultranspose(nSw, tempgSw, &gnom.lda[g * r * f], r, f, r, f);
  149 + if(debug){
  150 + std::cout<<"From Eigen function: projected Sb sn Sw"<<std::endl;