From 0d84034253937b68843ec545a0050dd6abaccf5c Mon Sep 17 00:00:00 2001 From: David Mayerich Date: Mon, 16 Dec 2019 13:50:34 -0600 Subject: [PATCH] public release commit --- CMakeLists.txt | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FindGLEW.cmake | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FindGLUT.cmake | 186 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FindLAPACKE.cmake | 43 +++++++++++++++++++++++++++++++++++++++++++ FindSTIM.cmake | 22 ++++++++++++++++++++++ src/basic_functions.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/enviload.h | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ga_gpu.cu | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ga_gpu.h | 649 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.cpp | 556 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/timer.h | 37 +++++++++++++++++++++++++++++++++++++ 11 files changed, 2164 insertions(+), 0 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 FindGLEW.cmake create mode 100644 FindGLUT.cmake create mode 100644 FindLAPACKE.cmake create mode 100644 FindSTIM.cmake create mode 100644 src/basic_functions.h create mode 100644 src/enviload.h create mode 100644 src/ga_gpu.cu create mode 100644 src/ga_gpu.h create mode 100644 src/main.cpp create mode 100644 src/timer.h diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..966967a --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,145 @@ +#Specify the version being used aswell as the language +cmake_minimum_required(VERSION 2.8) + +#Name your project here +project(ga-gpu) + +#set the module directory +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}") + +#default to release mode +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif(NOT CMAKE_BUILD_TYPE) + +#build the executable in the binary directory on MS Visual Studio +if ( MSVC ) + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${OUTPUT_DIRECTORY}") + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${OUTPUT_DIRECTORY}") +endif ( MSVC ) +#MAYBE REMOVE----------------- +#set C++11 flags if using GCC +if( CMAKE_COMPILER_IS_GNUCC ) +# SET( CMAKE_CXX_FLAGS "-std=c++11") + set(CMAKE_CXX_FLAGS "-std=c++11 -D_FORCE_INLINES") +# SET( CUDA_NVCC_FLAGS "-std=c++11") +endif( CMAKE_COMPILER_IS_GNUCC ) + +SET( CUDA_NVCC_FLAGS "--gpu-architecture=compute_50 --gpu-code=sm_50,compute_50") +#----------------------------- + + + +#find packages----------------------------------- +#find OpenCV +find_package(OpenCV REQUIRED) +add_definitions(-DUSING_OPENCV) + +#find the pthreads package +find_package(Threads) + +#find the X11 package +find_package(X11) + +#find the STIM library +find_package(STIM) + +#find CUDA, mostly for LA stuff using cuBLAS +find_package(CUDA REQUIRED) + +#find Boost for Unix-based file lists +if( CMAKE_COMPILER_IS_GNUCC ) + find_package(Boost COMPONENTS filesystem system) + if(Boost_FOUND) + include_directories(${Boost_INCLUDE_DIR}) + else() + message(FATAL_ERROR "HSIproc requires Boost::filesystem and Boost::system when using GCC") + endif() +endif() + +#find FANN +#find_package(FANN REQUIRED) + +#find the GLUT library for visualization +#find_package(OpenGL REQUIRED) +#find_package(GLUT REQUIRED) +#if(WIN32) +# find_package(GLEW REQUIRED) +# include_directories(${GLEW_INCLUDE_DIR}) +#endif(WIN32) + +#find LAPACK and supporting link_libraries +find_package(LAPACKE REQUIRED) + +#include include directories +include_directories(${CUDA_INCLUDE_DIRS} + ${OpenCV_INCLUDE_DIRS} + ${LAPACKE_INCLUDE_DIR} + ${STIM_INCLUDE_DIRS} + ${OpenGL_INCLUDE_DIRS} +# ${GLUT_INCLUDE_DIR} + ${FANN_INCLUDE_DIRS} + "${CMAKE_SOURCE_DIR}/src" +) + +#Assign a variable for all of the header files in this project +include_directories("${CMAKE_SOURCE_DIR}/src") +#file(GLOB GACPU_H "${CMAKE_SOURCE_DIR}/src/gacpu/*.h") +file(GLOB GAGPU_H "${CMAKE_SOURCE_DIR}/src/*.h") +#file(GLOB GA_H "${CMAKE_SOURCE_DIR}/src/*.h") + +#Assign source files to the appropriate variables to easily associate them with executables +#file(GLOB GA_CPU_SRC "${CMAKE_SOURCE_DIR}/src/gacpu/*.cpp") +file(GLOB GA_GPU_SRC "${CMAKE_SOURCE_DIR}/src/*.c*") + + +#create an executable file +cuda_add_executable(ga-gpu + ${GAGPU_H} +# ${GA_H} + ${GA_GPU_SRC} +) +target_link_libraries(ga-gpu ${CUDA_LIBRARIES} + ${CUDA_CUBLAS_LIBRARIES} + ${CUDA_CUFFT_LIBRARIES} + ${LAPACKE_LIBRARIES} + ${LAPACK_LIBRARIES} + ${BLAS_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} + ${X11_LIBRARIES} + ${OpenCV_LIBS} +) + + +#create the PROC executable---------------------------------------------- + +#create an executable file +#add_executable(hsiga +# ${GACPU_H} +# ${GA_H} +# ${GA_CPU_SRC} +#) +#target_link_libraries(hsiga ${LAPACKE_LIBRARIES} +# ${LAPACK_LIBRARIES} +# ${BLAS_LIBRARIES} +# ${CMAKE_THREAD_LIBS_INIT} +# ${X11_LIBRARIES} +# ${OpenCV_LIBS} +#) + + + +#if Boost is found, set an environment variable to use with preprocessor directives +if(Boost_FILESYSTEM_FOUND) +# if(BUILD_GACPU) +# target_link_libraries(hsiga ${Boost_FILESYSTEM_LIBRARIES} +# ${Boost_SYSTEM_LIBRARY} +# ) + #message(${Boost_FILESYSTEM_LIBRARIES}) +# endif(BUILD_GACPU) +# if(BUILD_GAGPU) + target_link_libraries(ga-gpu ${Boost_FILESYSTEM_LIBRARIES} + ${Boost_SYSTEM_LIBRARY} + ) +# endif(BUILD_GAGPU) +endif(Boost_FILESYSTEM_FOUND) diff --git a/FindGLEW.cmake b/FindGLEW.cmake new file mode 100644 index 0000000..0b72457 --- /dev/null +++ b/FindGLEW.cmake @@ -0,0 +1,126 @@ +# Copyright (c) 2012-2016 DreamWorks Animation LLC +# +# All rights reserved. This software is distributed under the +# Mozilla Public License 2.0 ( http://www.mozilla.org/MPL/2.0/ ) +# +# Redistributions of source code must retain the above copyright +# and license notice and the following restrictions and disclaimer. +# +# * Neither the name of DreamWorks Animation nor the names of +# its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# IN NO EVENT SHALL THE COPYRIGHT HOLDERS' AND CONTRIBUTORS' AGGREGATE +# LIABILITY FOR ALL CLAIMS REGARDLESS OF THEIR BASIS EXCEED US$250.00. +# + +#-*-cmake-*- +# - Find GLEW +# +# Author : Nicholas Yue yue.nicholas@gmail.com +# +# This auxiliary CMake file helps in find the GLEW headers and libraries +# +# GLEW_FOUND set if Glew is found. +# GLEW_INCLUDE_DIR GLEW's include directory +# GLEW_glew_LIBRARY GLEW libraries +# GLEW_glewmx_LIBRARY GLEWmx libraries (Mulitple Rendering Context) + +FIND_PACKAGE ( PackageHandleStandardArgs ) + +FIND_PATH( GLEW_LOCATION include/GL/glew.h + "$ENV{GLEW_ROOT}" + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS ( GLEW + REQUIRED_VARS GLEW_LOCATION + ) + +IF ( GLEW_LOCATION ) + + SET( GLEW_INCLUDE_DIR "${GLEW_LOCATION}/include" CACHE STRING "GLEW include path") + + SET ( ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) + IF (GLEW_USE_STATIC_LIBS) + IF (APPLE) + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + # MESSAGE ( "APPLE STATIC" ) + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} ) + ELSEIF (WIN32) + # Link library + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32S PATHS ${GLEW_LOCATION}/lib ) + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32MXS PATHS ${GLEW_LOCATION}/lib ) + ELSE (APPLE) + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".a") + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + # MESSAGE ( "LINUX STATIC" ) + # MESSAGE ( "GLEW_LIBRARY_PATH = " ${GLEW_LIBRARY_PATH} ) + ENDIF (APPLE) + ELSE () + IF (APPLE) + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib") + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib ) + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib ) + ELSEIF (WIN32) + # Link library + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib") + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW32 PATHS ${GLEW_LOCATION}/lib ) + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEW32mx PATHS ${GLEW_LOCATION}/lib ) + # Load library + SET(CMAKE_FIND_LIBRARY_SUFFIXES ".dll") + FIND_LIBRARY ( GLEW_DLL_PATH GLEW32 PATHS ${GLEW_LOCATION}/bin + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + FIND_LIBRARY ( GLEWmx_DLL_PATH GLEW32mx PATHS ${GLEW_LOCATION}/bin + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + ELSE (APPLE) + # Unices + FIND_LIBRARY ( GLEW_LIBRARY_PATH GLEW PATHS ${GLEW_LOCATION}/lib + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + FIND_LIBRARY ( GLEWmx_LIBRARY_PATH GLEWmx PATHS ${GLEW_LOCATION}/lib + NO_DEFAULT_PATH + NO_SYSTEM_ENVIRONMENT_PATH + ) + ENDIF (APPLE) + ENDIF () + # MUST reset + SET(CMAKE_FIND_LIBRARY_SUFFIXES ${ORIGINAL_CMAKE_FIND_LIBRARY_SUFFIXES}) + + SET( GLEW_GLEW_LIBRARY ${GLEW_LIBRARY_PATH} CACHE STRING "GLEW library") + SET( GLEW_GLEWmx_LIBRARY ${GLEWmx_LIBRARY_PATH} CACHE STRING "GLEWmx library") + +ENDIF () diff --git a/FindGLUT.cmake b/FindGLUT.cmake new file mode 100644 index 0000000..25b2cbb --- /dev/null +++ b/FindGLUT.cmake @@ -0,0 +1,186 @@ +#.rst: +# FindGLUT +# -------- +# +# try to find glut library and include files. +# +# IMPORTED Targets +# ^^^^^^^^^^^^^^^^ +# +# This module defines the :prop_tgt:`IMPORTED` targets: +# +# ``GLUT::GLUT`` +# Defined if the system has GLUT. +# +# Result Variables +# ^^^^^^^^^^^^^^^^ +# +# This module sets the following variables: +# +# :: +# +# GLUT_INCLUDE_DIR, where to find GL/glut.h, etc. +# GLUT_LIBRARIES, the libraries to link against +# GLUT_FOUND, If false, do not try to use GLUT. +# +# Also defined, but not for general use are: +# +# :: +# +# GLUT_glut_LIBRARY = the full path to the glut library. +# GLUT_Xmu_LIBRARY = the full path to the Xmu library. +# GLUT_Xi_LIBRARY = the full path to the Xi Library. + +#============================================================================= +# Copyright 2001-2009 Kitware, Inc. +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +if (WIN32) + find_path( GLUT_INCLUDE_DIR NAMES GL/glut.h + PATHS $ENV{GLUT_ROOT_PATH}/include ) + + if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) + find_library( GLUT_glut_LIBRARY NAMES freeglut + PATHS + $ENV{GLUT_ROOT_PATH}/lib/x64 + + NO_DEFAULT_PATH + ) + else( CMAKE_SIZEOF_VOID_P EQUAL 8 ) + find_library( GLUT_glut_LIBRARY NAMES glut glut32 freeglut + PATHS + ${OPENGL_LIBRARY_DIR} + $ENV{GLUT_ROOT_PATH}/lib + ) + endif( CMAKE_SIZEOF_VOID_P EQUAL 8 ) + +else () + + if (APPLE) + find_path(GLUT_INCLUDE_DIR glut.h ${OPENGL_LIBRARY_DIR}) + find_library(GLUT_glut_LIBRARY GLUT DOC "GLUT library for OSX") + find_library(GLUT_cocoa_LIBRARY Cocoa DOC "Cocoa framework for OSX") + + if(GLUT_cocoa_LIBRARY AND NOT TARGET GLUT::Cocoa) + add_library(GLUT::Cocoa UNKNOWN IMPORTED) + # Cocoa should always be a Framework, but we check to make sure. + if(GLUT_cocoa_LIBRARY MATCHES "/([^/]+)\\.framework$") + set_target_properties(GLUT::Cocoa PROPERTIES + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}/${CMAKE_MATCH_1}") + else() + set_target_properties(GLUT::Cocoa PROPERTIES + IMPORTED_LOCATION "${GLUT_cocoa_LIBRARY}") + endif() + endif() + else () + + if (BEOS) + + set(_GLUT_INC_DIR /boot/develop/headers/os/opengl) + set(_GLUT_glut_LIB_DIR /boot/develop/lib/x86) + + else() + + find_library( GLUT_Xi_LIBRARY Xi + /usr/openwin/lib + ) + + find_library( GLUT_Xmu_LIBRARY Xmu + /usr/openwin/lib + ) + + if(GLUT_Xi_LIBRARY AND NOT TARGET GLUT::Xi) + add_library(GLUT::Xi UNKNOWN IMPORTED) + set_target_properties(GLUT::Xi PROPERTIES + IMPORTED_LOCATION "${GLUT_Xi_LIBRARY}") + endif() + + if(GLUT_Xmu_LIBRARY AND NOT TARGET GLUT::Xmu) + add_library(GLUT::Xmu UNKNOWN IMPORTED) + set_target_properties(GLUT::Xmu PROPERTIES + IMPORTED_LOCATION "${GLUT_Xmu_LIBRARY}") + endif() + + endif () + + find_path( GLUT_INCLUDE_DIR GL/glut.h + /usr/include/GL + /usr/openwin/share/include + /usr/openwin/include + /opt/graphics/OpenGL/include + /opt/graphics/OpenGL/contrib/libglut + ${_GLUT_INC_DIR} + ) + + find_library( GLUT_glut_LIBRARY glut + /usr/openwin/lib + ${_GLUT_glut_LIB_DIR} + ) + + unset(_GLUT_INC_DIR) + unset(_GLUT_glut_LIB_DIR) + + endif () + +endif () + +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLUT REQUIRED_VARS GLUT_glut_LIBRARY GLUT_INCLUDE_DIR) + +if (GLUT_FOUND) + # Is -lXi and -lXmu required on all platforms that have it? + # If not, we need some way to figure out what platform we are on. + set( GLUT_LIBRARIES + ${GLUT_glut_LIBRARY} + ${GLUT_Xmu_LIBRARY} + ${GLUT_Xi_LIBRARY} + ${GLUT_cocoa_LIBRARY} + ) + + if(NOT TARGET GLUT::GLUT) + add_library(GLUT::GLUT UNKNOWN IMPORTED) + set_target_properties(GLUT::GLUT PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${GLUT_INCLUDE_DIR}") + if(GLUT_glut_LIBRARY MATCHES "/([^/]+)\\.framework$") + set_target_properties(GLUT::GLUT PROPERTIES + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}/${CMAKE_MATCH_1}") + else() + set_target_properties(GLUT::GLUT PROPERTIES + IMPORTED_LOCATION "${GLUT_glut_LIBRARY}") + endif() + + if(TARGET GLUT::Xmu) + set_property(TARGET GLUT::GLUT APPEND + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xmu) + endif() + + if(TARGET GLUT::Xi) + set_property(TARGET GLUT::GLUT APPEND + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Xi) + endif() + + if(TARGET GLUT::Cocoa) + set_property(TARGET GLUT::GLUT APPEND + PROPERTY INTERFACE_LINK_LIBRARIES GLUT::Cocoa) + endif() + endif() + + #The following deprecated settings are for backwards compatibility with CMake1.4 + set (GLUT_LIBRARY ${GLUT_LIBRARIES}) + set (GLUT_INCLUDE_PATH ${GLUT_INCLUDE_DIR}) +endif() + +mark_as_advanced( + GLUT_INCLUDE_DIR + GLUT_glut_LIBRARY + GLUT_Xmu_LIBRARY + GLUT_Xi_LIBRARY + ) diff --git a/FindLAPACKE.cmake b/FindLAPACKE.cmake new file mode 100644 index 0000000..a94f1c2 --- /dev/null +++ b/FindLAPACKE.cmake @@ -0,0 +1,43 @@ +# - Try to find LAPACKE +# +# Once done this will define +# LAPACKE_FOUND - System has LAPACKE +# LAPACKE_INCLUDE_DIRS - The LAPACKE include directories +# LAPACKE_LIBRARIES - The libraries needed to use LAPACKE +# LAPACKE_DEFINITIONS - Compiler switches required for using LAPACKE +# +# Usually, LAPACKE requires LAPACK and the BLAS. This module does +# not enforce anything about that. + +find_path(LAPACKE_INCLUDE_DIR + NAMES lapacke.h + PATHS $ENV{LAPACK_PATH} ${INCLUDE_INSTALL_DIR} + PATHS ENV INCLUDE) + +find_library(LAPACKE_LIBRARY liblapacke lapacke + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR} + PATHS ENV LIBRARY_PATH + PATHS ENV LD_LIBRARY_PATH) + +if(MSVC) + find_library(LAPACK_LIBRARY liblapack lapack + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR} + PATHS ENV LIBRARY_PATH + PATHS ENV LD_LIBRARY_PATH) + + find_library(BLAS_LIBRARY libblas blas + PATHS $ENV{LAPACK_PATH} ${LIB_INSTALL_DIR} + PATHS ENV LIBRARY_PATH + PATHS ENV LD_LIBRARY_PATH) + +else() + find_library(LAPACK REQUIRED) + find_library(BLAS REQUIRED) +endif() +set(LAPACKE_LIBRARIES ${LAPACKE_LIBRARY} ${LAPACK_LIBRARY} ${BLAS_LIBRARY}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LAPACKE DEFAULT_MSG + LAPACKE_INCLUDE_DIR + LAPACKE_LIBRARIES) +mark_as_advanced(LAPACKE_INCLUDE_DIR LAPACKE_LIBRARIES) diff --git a/FindSTIM.cmake b/FindSTIM.cmake new file mode 100644 index 0000000..a83481a --- /dev/null +++ b/FindSTIM.cmake @@ -0,0 +1,22 @@ +# finds the STIM library (downloads it if it isn't present) +# set STIMLIB_PATH to the directory containing the stim subdirectory (the stim repository) + +include(FindPackageHandleStandardArgs) + +set(STIM_INCLUDE_DIR $ENV{STIMLIB_PATH}) + +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) + +if(STIM_FOUND) + set(STIM_INCLUDE_DIRS ${STIM_INCLUDE_DIR}) +elseif(STIM_FOUND) + #if the STIM library isn't found, download it + #file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/stimlib) #remove the stimlib directory if it exists + #set(STIM_GIT "https://git.stim.ee.uh.edu/codebase/stimlib.git") + #execute_process(COMMAND git clone --depth 1 ${STIM_GIT} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + #set(STIM_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/stimlib" CACHE TYPE PATH) + message("STIM library not found. Set the STIMLIB_PATH environment variable to the STIMLIB location.") + message("STIMLIB can be found here: https://git.stim.ee.uh.edu/codebase/stimlib") +endif(STIM_FOUND) + +find_package_handle_standard_args(STIM DEFAULT_MSG STIM_INCLUDE_DIR) diff --git a/src/basic_functions.h b/src/basic_functions.h new file mode 100644 index 0000000..6d51168 --- /dev/null +++ b/src/basic_functions.h @@ -0,0 +1,85 @@ +#include + + +size_t* sortIndx(float* input, size_t size){ + //sort indices of score in ascending order (fitness value) + size_t *idx; + idx = (size_t*) malloc (size * sizeof (size_t)); + for (size_t i = 0; i < size; i++) + idx[i] = i; + + for (size_t i=0; i +void mtxMul(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){ + //compute output matrix M3 of size row1 X column2 and data is column major + for(size_t i = 0 ; i +void mtxMultranspose(T* M3, T* M1, T* M2, size_t r1, size_t c1, size_t r2, size_t c2){ + //compute output matrix M3 of size row1 X column2 and data is column major + for(size_t i = 0 ; i +void displayS(T* sw, size_t f){ + + for(size_t g = 0; g<1; g++){ + std::cout< eigenvalue[idx[j]]){ + std::swap (idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error + } + } + } + + std::cout<<"best eigenvalue index: "< +#include +#include +#include +#include +//#include + +#define NOMINMAX + +//stim libraries +#include +#include +#include +#include +#include +//#include +#include + +std::vector< stim::image > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel# +//loads spectral features into a feature matrix based on a set of class images (or masks) +float* load_features(size_t nC, size_t tP, size_t B, stim::envi E, std::vector< unsigned int > nP){ + float progress = 0; //initialize the progress bar variable + unsigned long long bytes_fmat = sizeof(float) * tP * B; //calculate the number of bytes in the feature matrix + std::cout<<"totalnumber of samples "< nP){ + unsigned int* T = (unsigned int*)malloc(tP*sizeof(unsigned int)); //generate an OpenCV vector of responses + size_t R_idx = 0; //index into the response array + for(size_t c = 0; c < nC; c++){ //for each class image + for(unsigned long long l = 0; l < nP[c]; l++){ //assign a response for all pixels of class c loaded in the training matrix + T[R_idx + l] = (unsigned int)c+1; + } + R_idx += nP[c]; //increment the response vector index + } + return T; +} + + +//loads the necessary data for training a random forest classifier +std::vector< unsigned int > ga_load_class_images(int argc, stim::arglist args, size_t* nC, size_t* tP){ + if(args["classes"].nargs() < 2){ //if fewer than two classes are specified, there's a problem + std::cout<<"ERROR: training requires at least two class masks"< nP; + size_t num_images = args["classes"].nargs(); //count the number of class images + //size_t num_images = args["rf"].nargs(); //count the number of class images + //std::vector filenames(num_images); //initialize an array of file names to store the names of the images + std::string filename; //allocate space to store the filename for an image + for(size_t c = 0; c < num_images; c++){ //for each image + filename = args["classes"].as_string(c);; //get the class image file name + stim::image image(filename); //load the image + //push_training_image(image.channel(0), nC, tP, nP); //push channel zero (all class images are assumed to be single channel) + C.push_back(image.channel(0)); + unsigned int npixels = (unsigned int)image.channel(0).nnz(); + nP.push_back(npixels); //push the number of pixels onto the pixel array + *tP += npixels; //add to the running total of pixels + *nC = *nC + 1; + } + + return nP; +} + +void display_PixelfeatureNclass(float* F, unsigned int* T, size_t B, size_t Idx){ + //display code for debug, displaying Idx th pixel from feature matrix F with all features B + std::cout<<"class of pixel["< +//#include "cuda_runtime.h" +//#include +//#include "device_launch_parameters.h" +#include + +#include "timer.h" +//#include +//#include +#include +#include + +extern Timer timer; + + +__global__ void kernel_computeSb(float* gpuSb, unsigned int* gpuP, float* gpuM, float* gpuCM, size_t ub, size_t f, size_t p, size_t nC, unsigned int* gpu_nPxInCls){ + + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix + size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome + size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //if we use 3d grid then it is needed + + + if(gnomeIndx >= p || i >= f || j >= f) return; //handling segmentation fault + + //form a sb matrix from vector sbVec, multiply each element in matrix with num of pixels in the current class + //and add it to previous value of between class scatter matrix sb + float tempsbval; + size_t n1; + size_t n2; + size_t classIndx; //class index in class mean matrix + + for(size_t c = 0; c < nC; c++){ + tempsbval = 0; + classIndx = c * ub; + n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix + n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix + tempsbval = ((gpuCM[classIndx + n1] - gpuM[n1]) *(gpuCM[classIndx + n2] - gpuM[n2])) * (float)gpu_nPxInCls[c] ; + gpuSb[gnomeIndx * f * f + j * f + i] += tempsbval; + } +} + + +//Compute within class scatter sw (p x f x f) of all gnome features phe(tP x f) +__global__ void kernel_computeSw(float* gpuSw, unsigned int* gpuP, float* gpuCM, float* gpuF, unsigned int* gpuT, size_t ub, size_t f, size_t p, size_t nC, size_t tP){ + size_t i = blockIdx.x * blockDim.x + threadIdx.x; //gnomeindex in population matrix + size_t j = blockIdx.y * blockDim.y + threadIdx.y; //index of feature index from gnome + size_t gnomeIndx = blockIdx.z * blockDim.z + threadIdx.z; //total number of individuals + + if(gnomeIndx >= p || i >= f || j >= f) return; //handling segmentation fault + float tempswval; + + size_t n1 = gpuP[gnomeIndx * f + i]; //actual feature index in original feature matrix + size_t n2 = gpuP[gnomeIndx * f + j]; //actual feature index in original feature matrix + tempswval = 0; + for(size_t c = 0; c < nC; c++){ + tempswval = 0; + for(size_t k = 0; k < tP; k++){ + if(gpuT[k] == (c+1) ){ + tempswval += ((gpuF[ k * ub + n1] - gpuCM[c * ub + n1]) * (gpuF[k * ub + n2] - gpuCM[c * ub + n2])); + } + } + gpuSw[gnomeIndx * f * f + j * f + i] += tempswval; + } +} + + + + + //=============================gpu intialization============================================= + /// Initialize all GPU pointers used in the GA-GPU algorithm + /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population + /// @param p is the population size + /// @param f is the number of desired features + /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean + /// @param cpuM is a pointer to the class mean on the CPU + /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class + /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class + /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices + /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices + /// @param gpuF is the destination for the GPU feature matrix + /// @param cpuF is the complete feature matrix on the CPU + + void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation + float** gpuCM, float* cpuCM, size_t nC, unsigned int ub, + float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls, + float** gpuSb, float** gpuSw, + float** gpuF, float* cpuF, + unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls){ + + HANDLE_ERROR(cudaMalloc(gpuP, p * f * sizeof(unsigned int))); //allocate space for the population on the GPU + + HANDLE_ERROR(cudaMalloc(gpuCM, nC * ub * sizeof(float))); //allocate space for the class mean and copy it to the GPU + HANDLE_ERROR(cudaMemcpy(*gpuCM, cpuCM, nC * ub * sizeof(float), cudaMemcpyHostToDevice)); + + + HANDLE_ERROR(cudaMalloc(gpuM, ub * sizeof(float))); //allocate space for the mean of the feature matrix + HANDLE_ERROR(cudaMemcpy(*gpuM, cpuM, ub * sizeof(float), cudaMemcpyHostToDevice)); + + HANDLE_ERROR(cudaMalloc(gpu_nPxInCls, nC * sizeof(unsigned int))); //number of pixels in each class + HANDLE_ERROR(cudaMemcpy(*gpu_nPxInCls, cpu_nPxInCls, nC * sizeof(unsigned int), cudaMemcpyHostToDevice)); + + + HANDLE_ERROR(cudaMalloc(gpuSb, p * f * f * sizeof(float))); //allocate memory for sb which is calculated for eery class separately and added together in different kernel + HANDLE_ERROR(cudaMalloc(gpuSw, p * f * f * sizeof(float))); + + HANDLE_ERROR(cudaMalloc(gpuF, tP * ub * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(*gpuF, cpuF, tP * ub * sizeof(float), cudaMemcpyHostToDevice)); + + HANDLE_ERROR(cudaMalloc(gpuT, tP * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemcpy(*gpuT, cpuT, tP* sizeof(unsigned int), cudaMemcpyHostToDevice)); + + } + + //computation on GPU + /// Initialize all GPU pointers used in the GA-GPU algorithm + /// @param gpuP is a pointer to GPU memory location, will point to memory space allocated for the population + /// @param p is the population size + /// @param f is the number of desired features + /// @param gpuSb is a GPU memory pointer to the between-class scatter matrices + /// @param cpuSb is the between-class scatter matrix on the GPU (this function will copy the GPU result there) + /// @param gpuSw is a GPU memory pointer to the within-class scatter matrices + /// @param cpuSw is the within-class scatter matrix on the GPU (this function will copy the GPU result there) + + /// @param gpuCM is a pointer to a GPU memory location, will point to the class mean + /// @param cpuM is a pointer to the class mean on the CPU + /// @param gpu_nPxInCls is a pointer to a GPU memory location storing the number of pixels in each class + /// @param gpu_nPxInCls is a CPU array storing the number of pixels in each class + + /// @param gpuF is the destination for the GPU feature matrix + /// @param cpuF is the complete feature matrix on the CPU + void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f, + float* gpuSb, float* cpuSb, + float* gpuSw, float* cpuSw, + float* gpuF, unsigned int* gpuT,float* gpuM, float* gpuCM, + size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile){ + + timer.start(); + HANDLE_ERROR(cudaMemcpy(gpuP, cpuP, p * f * sizeof(unsigned int), cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemset(gpuSb, 0, p * f * f * sizeof(float))); + + //grid configuration of GPU + size_t threads = (size_t)sqrt(props.maxThreadsPerBlock); + if(threads > f) threads = f; + size_t numberofblocksfor_f = (size_t)ceil((float)f/ threads); + dim3 blockdim((int)threads, (int)threads, 1); + dim3 griddim((int)numberofblocksfor_f, (int)numberofblocksfor_f, (int)p); //X dimension blocks will cover all gnomes of the population and each block will have as many gnomes as it can feet + //sharedbytes calculation + size_t sharedBytes = p * f * sizeof(unsigned int); //copy population to shared memory + if(props.sharedMemPerBlock < sharedBytes) sharedBytes = props.sharedMemPerBlock; + + //launch kernel to compute sb matrix + kernel_computeSb<<>>(gpuSb, gpuP, gpuM, gpuCM, ub, f, p, nC, gpu_nPxInCls); + cudaDeviceSynchronize(); + + HANDLE_ERROR(cudaMemcpy(cpuSb, gpuSb, p * f * f * sizeof(float), cudaMemcpyDeviceToHost)); //copy between class scatter from gpu to cpu + const auto elapsedg1 = timer.time_elapsed(); + if(gen > gnrtn -2){ + std::cout << "Sb gpu time "<(elapsedg1).count() << "us" << std::endl; + profilefile << "Sb gpu time "<(elapsedg1).count() << "us" << std::endl; + } + + timer.start(); + //Compute within class scatter + HANDLE_ERROR(cudaMemset(gpuSw, 0, p * f * f * sizeof(float))); + + //launch kernel to compute sb matrix + kernel_computeSw<<>>(gpuSw, gpuP, gpuCM, gpuF, gpuT, ub, f, p, nC, tP); + cudaDeviceSynchronize(); + //copy between class scatter from gpu to cpu + HANDLE_ERROR(cudaMemcpy(cpuSw, gpuSw, p * f * f * sizeof(float), cudaMemcpyDeviceToHost)); + const auto elapsedg2 = timer.time_elapsed(); + if(gen > gnrtn - 2){ + std::cout << "Sw gpu time "<(elapsedg2).count() << "us" << std::endl; + profilefile<< "Sw gpu time "<(elapsedg2).count() << "us" << std::endl; + } + + } + + //free all gpu pointers + void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT){ + + HANDLE_ERROR(cudaFree(gpuP)); + HANDLE_ERROR(cudaFree(gpuCM)); + HANDLE_ERROR(cudaFree(gpuM)); + HANDLE_ERROR(cudaFree(gpu_nPxInCls)); + HANDLE_ERROR(cudaFree(gpuSb)); + HANDLE_ERROR(cudaFree(gpuSw)); + HANDLE_ERROR(cudaFree(gpuF)); + HANDLE_ERROR(cudaFree(gpuT)); + } + +#endif + diff --git a/src/ga_gpu.h b/src/ga_gpu.h new file mode 100644 index 0000000..e05bd6e --- /dev/null +++ b/src/ga_gpu.h @@ -0,0 +1,649 @@ +#ifndef GA_GPU_H +#define GA_GPU_H + +#include +#include +#include +#include +#include +#include +#include + +#include "timer.h" + +#include "basic_functions.h" +//LAPACKE support for Visual Studio + +#ifndef LAPACK_COMPLEX_CUSTOM +#define LAPACK_COMPLEX_CUSTOM +#define lapack_complex_float std::complex +#define lapack_complex_double std::complex +#include "lapacke.h" +#endif + + +#define LAPACK_ROW_MAJOR 101 +#define LAPACK_COL_MAJOR 102 + +//CUDA functions +void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation + float** gpuCM, float* cpuCM, size_t nC, unsigned int ub, + float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls, + float** gpuSb, float** gpuSw, + float** gpuF, float* cpuF, + unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls); +void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f, + float* gpuSb, float* cpuSb, + float* gpuSw, float* cpuSw, + float* gpuF, unsigned int* T, float* gpuM, float* gpuCM, + size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile); +void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT); + +struct _fcomplex { float re, im; }; +typedef struct _fcomplex fcomplex; + +Timer timer; + +class ga_gpu { + +public: + float* F; //pointer to the raw data in host memory + unsigned int* T; //pointer to the class labels in host memory + size_t gnrtn; //total number of generations + size_t p; //population size + size_t f; // number of features to be selected + + unsigned int* P; //pointer to population of current generation genotype matrix (p x f) + float* S; //pointer to score(fitness value) of each gnome from current population matric P + unsigned int* i_guess; //initial guess of features if mentioined in args add to initial population + unsigned int ub; //upper bound for gnome value (maximum feature index from raw feature matrix F) + unsigned int lb; //lower bound for gnome value (minimum feature index from raw feature matrix F = 0) + float uniformRate; + float mutationRate; + size_t tournamentSize; //number of potential gnomes to select parent for crossover + bool elitism; //if true then passes best gnome to next generation + + //declare gpu pointers + float* gpuF; //Feature matrix + unsigned int* gpuT; //target responses of entire feature matrix + unsigned int* gpuP; //population matrix + unsigned int* gpu_nPxInCls; + float* gpuCM; //class mean of entire feature matrix + float* gpuM; //total mean of entire feature matrix + float* gpuSb; //between class scatter for all individuals of current population + float* gpuSw; //within class scatter for all individuals of current population + + //constructor + ga_gpu() {} + + //==============================generate initial population + + void initialize_population(std::vector i_guess, bool debug) { + if (debug) { + std::cout << std::endl; + std::cout << "initial populatyion is: " << std::endl; + } + + lb = 0; + P = (unsigned int*)calloc(p * f, sizeof(unsigned int)); //allcate memory for genetic population(indices of features from F), p number of gnomes of size f + S = (float*)calloc(p, sizeof(float)); //allcate memory for scores(fitness value) of each gnome from P + + srand(1); + //add intial guess to the population if specified by user as a output of other algorithm or by default just random guess + std::memcpy(P, i_guess.data(), f * sizeof(unsigned int)); + + //generate random initial population + for (size_t i1 = 1; i1 < p; i1++) { + for (size_t i2 = 0; i2 < f; i2++) { + P[i1 * f + i2] = rand() % ub + lb; //select element of gnome as random feature index within lower bound(0) and upper bound(B) + if (debug) std::cout << P[i1 * f + i2] << "\t"; + } + if (debug) std::cout << std::endl; + } + } + + //===================generation of new population========================================== + + size_t evolvePopulation(unsigned int* newPop, float* M, bool debug) { + + //gget index of best gnome in the current population + size_t bestG_Indx = gIdxbestGnome(); + //-------------(reproduction)------- + if (elitism) { + saveGnomeIdx(0, bestG_Indx, newPop); //keep best gnome from previous generation to new generation + } + // ------------Crossover population--------------- + int elitismOffset; + if (elitism) { + elitismOffset = 1; + } + else { + elitismOffset = 0; + } + + //Do crossover for rest of population size + for (int i = elitismOffset; i gnome1; + gnome1.reserve(f); + gnome1 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes + // displaygnome(gnome1); + std::vectorgnome2; + gnome2.reserve(f); + gnome2 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes + // displaygnome(gnome2); + std::vectorgnome; + gnome.reserve(f); + gnome = crossover(gnome1, gnome2, M); //Do crossover of above parent gnomes to produce new gnome + // displaygnome(gnome); + saveGnome(i, gnome, newPop); //save crosseover result to new population + } + + //--------------Mutate population------------ + // introduce some mutation in new population + for (int i = elitismOffset; i gnome; + gnome.reserve(f); + + for (size_t n = 0; n < f; n++) + gnome.push_back(newPop[i*f + n]); + //std::cout<<"\n starting address "<<(&newPop[0] + i*f)<<"\t end address is "<<(&newPop[0] + i*f + f-1) < tournamentSelection(size_t tSize) { + // Create a tournament population + unsigned int* tournamentP = (unsigned int*)malloc(tSize * f * sizeof(unsigned int)); + std::vectortournamentS; + + // For each place in the tournament get a random individual + for (size_t i = 0; i < tSize; i++) { + size_t rndmIdx = rand() % p + lb; + tournamentS.push_back(S[rndmIdx]); + //for (size_t n = 0; n temp_g(getGnome(rndmIdx)); + std::copy(temp_g.begin(), temp_g.end(), tournamentP + i*f); + } + // Get the fittest + std::vectorfittestgnome; + fittestgnome.reserve(f); + + //select index of best gnome from fitness score + size_t bestSIdx = 0; + for (size_t i = 0; i < tSize; i++) { + if (tournamentS[i] < tournamentS[bestSIdx]) + bestSIdx = i; //float check : it was like this b(&idx[i], &idx[j]) but gave me error + } + + for (size_t n = 0; n < f; n++) + fittestgnome.push_back(tournamentP[bestSIdx * f + n]); + return fittestgnome; + } //end of tournament selection + + + std::vector crossover(std::vector gnome1, std::vector gnome2, float* M) { + std::vector gnome; + for (size_t i = 0; i < f; i++) { + // Crossover + float r = static_cast (rand()) / static_cast (RAND_MAX); + if (r <= uniformRate) { + gnome.push_back(gnome1.at(i)); + } + else { + gnome.push_back(gnome2.at(i)); + } + } + + //check new gnome for all zero bands and duplicated values + std::vector gnomeunique; + int flag = 0; + std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7 + std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); + /* if(gnomeunique.size()< gnome.size()){ + flag = 1; + std::cout<<"gnome:["< gnome) { + for (size_t i = 0; i < f; i++) { + float LO = (float)0.01; + float HI = 1; + float r3 = LO + static_cast (rand()) / (static_cast (RAND_MAX / (HI - LO))); + //if random value is less than mutationRate then mutate this gnome + if (r3 <= mutationRate) { + gnome.at(i) = (rand() % ub + lb); + gnome.push_back(rand() % ub + lb); + } + } + } + + ///returns gnome of given index + std::vector getGnome(size_t idx) { + std::vector gnome; + gnome.reserve(f); + //pulling gnome idx from population P + for (size_t n = 0; n < f; n++) + gnome.push_back(P[idx * f + n]); + //memcpy(&gnome[0], P+idx*f, f*sizeof(size_t)); + return gnome; + } + + //save gnome of index gIdx from previous population at position i in the new population + void saveGnomeIdx(size_t i, size_t gIdx, unsigned int* newPop) { + for (size_t n = 0; n < f; n++) + newPop[i * f + n] = P[gIdx * f + n]; + } + + void saveGnome(size_t idx, std::vectorgnome, unsigned int* newPop) { + std::copy(gnome.begin(), gnome.end(), newPop + idx*f); + } + + size_t gIdxbestGnome() { + //std::cout<<"best gnome indes is: "< gnome) { + std::cout << "\t gnome: "; + for (int i = 0; i S[idx[j]]) { + std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error + } + } + } + + //display best gnome + //std::cout << "best fitness value: " << S[idx[0]] << std::endl; + /*if (S[idx[0]] < 0) { + std::cout << "best gnome is " << std::endl; + for (size_t i = 0; i < f; i++) + std::cout << P[f * idx[0] + i] << ", "; + std::cout << std::endl; + }*/ + + return idx; //use as sortSIdx in selection + } + + + //size_t* sortIndx(float* input, size_t size) { + // //sort indices of score in ascending order (fitness value) + // size_t *idx; + // idx = (size_t*)malloc(size * sizeof(size_t)); + // for (size_t i = 0; i < size; i++) + // idx[i] = i; + + // for (size_t i = 0; i nPxInCls) { + for (size_t c = 0; c < nC; c++) { //index of class feature matrix responses + float* tempcM = (float*)calloc(B, sizeof(float)); //tempcM holds classmean vector for current gnome 'i', class 'c' + for (size_t k = 0; k < tP; k++) { //total number of pixel in feature matrix + if (T[k] == c + 1) { //class numbers start from 1 not 0 + for (size_t n = 0; n < B; n++) { //total number of features in a gnome + tempcM[n] += F[k * B + n]; //add phe value for feature n of class 'c' in ith gnome + } + } + } + for (size_t n = 0; n < B; n++) + cM[c * B + n] = tempcM[n] / (float)nPxInCls[c]; //divide by number of pixels from class 'c' + + } + + } + + //display class mean + void dispalyClassmean(float* cM, size_t nC) { + std::cout << std::endl; + std::cout << "class mean of gnome 1 with total classes " << nC << " is :" << std::endl; + for (size_t i = 0; i < 1; i++) { + for (size_t c = 0; c < nC; c++) { + for (size_t j = 0; j < f; j++) { + size_t index = P[i*f + j]; + + std::cout << "class index: " << c << "\t feature index " << index << "\t class mean " << cM[c * ub + index] << std::endl; + } + } + } + std::cout << std::endl; + } + + //-----------------------------------------between and within class Scattering computation--------------------------------------------------------------- + //computation on CPU + void cpu_computeSbSw(float* sb, float* sw, float* M, float* cM, size_t nC, size_t tP, std::vector nPxInCls) { + timer.start(); + computeSb(sb, M, cM, nC, nPxInCls); //compute between class scatter on CPU + const auto elapsed = timer.time_elapsed(); + std::cout << "Sb CPU time " << std::chrono::duration_cast(elapsed).count() << "us" << std::endl; + + timer.start(); + computeSw(sw, cM, nC, tP); //compute within class scatter on CPU + const auto elapsed1 = timer.time_elapsed(); + std::cout << "Sw CPU time " << std::chrono::duration_cast(elapsed1).count() << "us" << std::endl; + } + + //display between class scatter + void displaySb(float* sb) { + std::cout << "between scatter is " << std::endl; + for (size_t g = 0; g<1; g++) { + std::cout << std::endl; + for (size_t j = 0; j < f; j++) { //total number of features in a gnome + for (size_t k = 0; k < f; k++) { //total number of features in a gnome + std::cout << sb[g * f * f + j * f + k] << " "; + } + std::cout << std::endl; + } + } + std::cout << std::endl; + } + + //Compute between class scatter sb (p x f x f) of all gnome features phe(tP x f) + void computeSb(float* sb, float* M, float* cM, size_t nC, std::vector nPxInCls) { + float tempsbval; + size_t n1; + size_t n2; + size_t classIndx; //class index in class mean matrix + /*std::cout <<"population of computation of cpusb "<< std::endl; + for (size_t i2 = 0; i2 < f; i2++) { + std::cout << P[i2] << "\t"; + }*/ + + for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) { + for (size_t c = 0; c < nC; c++) { + for (size_t i = 0; i < f; i++) { + for (size_t j = 0; j < f; j++) { + tempsbval = 0; + classIndx = c * ub; + n1 = P[gnomeIndx * f + i]; //actual feature index in original feature matrix + n2 = P[gnomeIndx * f + j]; //actual feature index in original feature matrix + // std::cout << "i: " << i << " j: " < gnome = getGnome(g); + // std::vector gnomeunique; + // int flag = 0; //flag will be set if gnome has duplicated band (feature) index + // std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7 + // std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //keep only unique copies of indices and remove duplicate copies + // if (gnomeunique.size()< gnome.size()) { + // flag = 1; //set flag for those if there are duplicated indices + // //std::cout<<"gnome:["< gnome = getGnome(g); //get current gnome g from population matrix P + std::vector gnomeunique; //array to store only unique band indicies in a genome + int flag = 0; //flag will be set if gnome has duplicated band (feature) index + std::sort(gnome.begin(), gnome.end()); //sort current gnome + std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //remove duplicat copies of band indices and keep only unique in a gnome + if (gnomeunique.size()< gnome.size()) { + flag = 1; //set flag for those if there are duplicated indices + //std::cout<<"gnome:["<cpu_nPxInCls, size_t tP, size_t nC) { + // call gpuInitialization(......) with all of the necessary parameters + gpuIntialization(&gpuP, p, f, &gpuCM, cpuCM, nC, ub, &gpuM, cpuM, &gpu_nPxInCls, &gpuSb, &gpuSw, &gpuF, F, &gpuT, T, tP, &cpu_nPxInCls[0]); + + } + + //Computation of between class scatter and within class scatter in GPU + void gpu_computeSbSw(float* cpuSb, float* cpuSw, size_t nC, size_t tP, cudaDeviceProp props, size_t gen, bool debug, std::ofstream& profilefile) { + //calling function for SW and Sb computation and passing necessary arrays for computation + // std::cout<<"gpu function calling"< + +//stim libraries +#include +#include +#include +#include +#include +#include +//input arguments +stim::arglist args; +#include +#include +#include +#include +#include +#include + +#define NOMINMAX + + + +//GA +#include "ga_gpu.h" +#include "enviload.h" + + +//envi input file and associated parameters +stim::envi E; //ENVI binary file object +unsigned int B; //shortcuts storing the spatial and spectral size of the ENVI image +//mask and class information used for training +//std::vector< stim::image > C; //2D array used to access each mask C[m][p], where m = mask# and p = pixel# +std::vector nP; //array holds the number of pixels in each mask: nP[m] is the number of pixels in mask m +size_t nC = 0; //number of classes +size_t tP = 0; //total number of pixels in all masks: tP = nP[0] + nP[1] + ... + nP[nC] +float* fea; + +//ga_gpu class object +ga_gpu ga; +bool debug; +bool binaryClass; +int binClassOne; + +//creating struct to pass to thread functions as it limits number of arguments to 3 +typedef struct { + float* S; + float* Sb; + float* Sw; + float* lda; +}gnome; +gnome gnom; + + +void gpuComputeEignS( size_t g, size_t fea){ + //eigen value computation will return r = (nC-1) eigen vectors so new projected data will have dimension of r rather than f + // std::thread::id this_id = std::this_thread::get_id(); + // std::cout<<"thread id is "<< this_id< features = ga.getGnome(g); + std::vector featuresunique; + int flag = 0; + std::sort(features.begin(), features.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7 + std::unique_copy(features.begin(), features.end(), std::back_inserter(featuresunique)); + if(featuresunique.size()< features.size()){ + f = featuresunique.size(); + } + + size_t r = nC-1; //LDA projected dimension (limited to number of classes - 1 by rank) + if(r > f){ + r = f; + } + + int info; + float* EigenvaluesI_a = (float*)malloc(f * sizeof(float)); + float* Eigenvalues_a = (float*)malloc(f * sizeof(float)); + int *IPIV = (int*) malloc(sizeof(int) * f); + //computing inverse of matrix Sw + memset(IPIV, 0, f * sizeof(int)); + LAPACKE_sgetrf(LAPACK_COL_MAJOR, (int)f, (int)f, gSw_a, (int)f, IPIV); + // DGETRI computes the inverse of a matrix using the LU factorization computed by DGETRF. + LAPACKE_sgetri(LAPACK_COL_MAJOR, (int)f, gSw_a, (int)f, IPIV); + + float* gSbSw_a = (float*)calloc(f * f, sizeof(float)); + //mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f * sizeof(float)], f, f, f,f); + mtxMul(gSbSw_a, gSw_a, &gnom.Sb[g * f * f], f, f, f,f); + if(debug){ + std::cout<<"From Eigen function: inverse of sw and ratio of sb and sw (Sb/Sw)"; + displayS(gSw_a, f); //display inverse of Sw (1/Sw) + displayS(gSbSw_a, f); //display ratio of Sb and Sw (Sb/Sw) + } + + //compute left eigenvectors for current gnome from ratio of between class scatter and within class scatter: Sb/Sw + info = LAPACKE_sgeev(LAPACK_COL_MAJOR, 'V', 'N', (int)f, gSbSw_a, (int)f, Eigenvalues_a, EigenvaluesI_a, LeftEigVectors_a, (int)f, 0, (int)f); + //sort eignevalue indices in descending order + size_t* sortedindx = sortIndx(Eigenvalues_a, f); + //displayS(LeftEigVectors_a, f); //display Eignevectors (Note these are -1 * matlab eigenvectors does not change fitness score results but keep in mind while projecting data on it) + //sorting left eigenvectors (building forward transformation matrix As) + for (size_t rowE = 0; rowE < r; rowE++){ + for (size_t colE = 0; colE < f; colE++){ + size_t ind1 = g * r * f + rowE * f + colE; + //size_t ind1 = rowE * f + colE; + size_t ind2 = sortedindx[rowE] * f + colE; //eigenvector as row vector + gnom.lda[ind1] = LeftEigVectors_a[ind2]; + } + } + + if(debug){ + std::cout<<"Eigenvalues are"< threads; + //for (size_t g = 0; g ga.gnrtn - 2){ + std::cout << "gpu_eigen time "<(elapsed1).count() << "us" << std::endl; + profilefile<< "gpu_eigen time "<(elapsed1).count() << "us" << std::endl; + } + //ga.S = gnom.score; + //size_t bestGnomeIdx = ga.sortSIndx()[0]; + +}//end of fitness function + +void binaryclassifier(int classnum){ + unsigned int* target = (unsigned int*) calloc(tP, sizeof(unsigned int)); + memcpy(target, ga.T, tP * sizeof(unsigned int)); + for(int i = 0 ; i < tP; i++){ + if(target[i]==classnum){ + ga.T[i] = 1; + + }else + ga.T[i] = 0; + } +} + + + +void advertisement() { + std::cout << std::endl; + std::cout << "=========================================================================" << std::endl; + std::cout << "Thank you for using the GA-GPU features selection for spectroscopic image!" << std::endl; + std::cout << "=========================================================================" << std::endl << std::endl; +// std::cout << args.str(); +} + +int main(int argc, char* argv[]){ + +//Add the argument options and set some of the default parameters + args.add("help", "print this help"); + args.section("Genetic Algorithm"); + args.add("features", "select features selection algorithm parameters","10", "number of features to be selected"); + args.add("classes", "image masks used to specify classes", "", "class1.bmp class2.bmp class3.bmp"); + args.add("population", "total number of feature subsets in puplation matrix", "1000"); + args.add("generations", "number of generationsr", "50"); + args.add("initial_guess", "initial guess of featues", ""); + args.add("debug", "display intermediate data for debugging"); + args.add("binary", "Select features for binary classes", ""); + args.add("trim", "this gives wavenumber to use in trim option of siproc which trims all bands from envi file except gagpu selected bands"); + + args.parse(argc,argv); //parse the command line arguments + +//Print the help text if set + if(args["help"].is_set()){ //display the help text if requested + advertisement(); + std::cout< wavelengths = E.header.wavelength; //wavelengths of each band + + if(E.header.interleave != stim::envi_header::BIP){ //this code can only load bip files and hence check that in header file + std::cout<<"this code works for only bip files. please convert file to bip file"< i_guess(ga.f); + for (size_t b = 0; b < ga.f; b++) //generate default initial guess + i_guess[b] = rand() % B + 0; + + if (args["initial_guess"].is_set()) {//if the user specifies the --initialguess option & provides feature indices as initial guess + size_t nf = args["initial_guess"].nargs(); //get the number of arguments after initial_guess + if (nf == 1 || nf == ga.f) { //check if file with initial guessed indices is given or direct indices are given as argument + if (nf == 1) { //if initial guessed feature indices are given in file + std::ifstream in; //open the file containing the baseline points + in.open(args["initial_guess"].as_string().c_str()); + if (in.is_open()){ //if file is present and can be opened then read it + unsigned int b_ind; + while (in >> b_ind) //get each band index and push it into the vector + i_guess.push_back(b_ind); + } + else + std::cout << "cannot open file of initial_guess indices" << std::endl; + } + else if (nf == ga.f) { //if direct indices are given as argument + for (size_t b = 0; b < nf; b++) //for each band given by the user + i_guess[b] = args["initial_guess"].as_int(b); //store that band in the i_guess array + } + } + } + + ga.initialize_population(i_guess, debug); //initialize first population set for first generation, user can pass manually preferred features from command line + //display_gnome(0); + +//------------------Calculate class means and total mean of features---------------------------- + float* M = (float*)calloc( B , sizeof(float)); //total mean of entire feature martix for all features (bands B) + ga.ttlMean(M, tP, B); //calculate total mean, ga.F is entire feature matrix, M is mean for all bands B(features) + if(debug) ga.dispalymean(M); //if option --debug is used display all bands mean + + //display band index of bands with mean zero, this indicates that band has all zero values + std::cout<<"Display features indices with zero mean "< bestgnome; //holds best gnome after each generation evaluation + size_t bestG_Indx; //This gives index of best gnome in the current population to get best gnome and its fitness value + unsigned int* newPop = (unsigned int*) calloc(ga.p * ga.f, sizeof(unsigned int)); //temprory storage of new population + double* best_S = (double*) calloc (ga.gnrtn, sizeof(double)); //stores fitness value of best gnome at each iteration + float* lda = (float*) calloc (ga.p * (nC-1) * ga.f, sizeof(float)); //stores LDA basis for each gnome so that we can have best gnome's LDA basis + float* sb = (float*) calloc( ga.p * ga.f * ga.f , sizeof(float)) ; //3d matrix for between class scatter (each 2d matrix between class scatter for one gnome) + float* sw = (float*) calloc( ga.p * ga.f * ga.f , sizeof(float)) ; //3d matrix for within class scatter (each 2d matrix within class scatter for one gnome) + ga.zerobandcheck(M, true); //checking bands with all zeros and duplicated bands in a gnome replacing them with other bands avoiding duplication and zero mean + ga.zerobandcheck(M, true); //Repeating zeroband cheack as some of these bands are not replaced in previous run and gave random results + time_t gpu_timestart = time(NULL); //start a timer for total evoluation + + for (size_t gen = 0; gen < ga.gnrtn; gen++){ //for each generation find fitness value of all gnomes in population matrix and generate population for next generation + //std::cout<<"Generation: "<ga.gnrtn -2){ + std::cout << "population evolution time "<(pop_generation).count() << "us" << std::endl; + profilefile<<"population evolution time "<(pop_generation).count() << "us" < trimindex(ga.f); //create a vector to store temprory trim index bounds + std::vector finaltrim_ind; //create a vector to store final trim index bounds + std::vector trim_wv; //create a vector to store final trim wavelength bounds + + //trim index + trimindex.push_back(1); //1st trimming band index is 1 + for (size_t i = 0; i < ga.f; i++) { // for each feature find its bound indexes + trimindex[i * 2] = bestgnome.at(i) - 1; + trimindex[i * 2 + 1] = bestgnome.at(i) + 1; + } + trimindex.push_back(B); //last bound index is B + + //organize trim index + int k = 0; + for (size_t i = 0; i < ga.f + 1; i++) { // find valid pair of trim indices bound excluding adjacent trim indices + if (trimindex[2 * i] < trimindex[2 * i + 1]) { + finaltrim_ind.push_back(trimindex[2 * i]); //this is left bound + finaltrim_ind.push_back(trimindex[2 * i + 1]); + k = k + 2; + } + } + //add duplicated trim indices as single index to final trim index + for (size_t i = 0; i < ga.f + 1; i++) { //check each pair of trim indices for duplications + if (trimindex[2 * i] == trimindex[2 * i + 1]) { // (duplication caused due to adjacent features) + finaltrim_ind.push_back(trimindex[2 * i]); // remove duplicated trim indices replace by one + k = k + 1; + } + } + + + ////output actual wavelenths corresponding to those trim indices + ////these wavenumber are grouped in pairs, check each pair, if duplicated numbers are there in pair delete one and keep other as band to trim, if 2nd wavnumber is smaller than 1st in a pair ignore that pair + ////e.g [1, 228, 230, 230, 232, 350,352, 351, 353, 1200] pairas [1(start)-228,230-230, 232-350, 352-351, 353-1200(end)], trimming wavenumbers are [1-228, 230, 233-350, 353-1200] + csv << (wavelengths[finaltrim_ind.at(0)]); + for (size_t i = 1; i < ga.f * + 2 ; i++) + csv << "," << (wavelengths[finaltrim_ind.at(i)]); + csv << std::endl; + } //end trim option + + + //free gpu pointers + ga.gpu_Destroy(); + + //free pointers + std::free(sb); + std::free(sw); + std::free(M); + std::free(cM); + std::free(best_S); + std::free(lda); + std::free(newPop); + +}//end main + + diff --git a/src/timer.h b/src/timer.h new file mode 100644 index 0000000..a482858 --- /dev/null +++ b/src/timer.h @@ -0,0 +1,37 @@ +#ifndef GA_GPU_TIMER_H +#define GA_GPU_TIMER_H + +#include +//#include +//#include + +//using namespace std::chrono; + +class Timer { +typedef std::chrono::high_resolution_clock Clock; + +Clock::time_point epoch; +public: + void start(){ + epoch = Clock::now(); + } + Clock::duration time_elapsed() const{ + return Clock::now() - epoch; + } +}; + +#endif + + +//class Timer { +// std::chrono::time_point epoch; +// +// public: +// typedef high_resolution_clock Clock; +// void start(){ +// epoch = Clock::now(); +// } +// std::chrono::time_point time_elapsed() const { +// return Clock::now() - epoch; +// } +//}; \ No newline at end of file -- libgit2 0.21.4