#ifndef GA_GPU_H #define GA_GPU_H #include #include #include #include #include #include #include #include "timer.h" #include "basic_functions.h" //LAPACKE support for Visual Studio #ifndef LAPACK_COMPLEX_CUSTOM #define LAPACK_COMPLEX_CUSTOM #define lapack_complex_float std::complex #define lapack_complex_double std::complex #include "lapacke.h" #endif #define LAPACK_ROW_MAJOR 101 #define LAPACK_COL_MAJOR 102 //CUDA functions void gpuIntialization(unsigned int** gpuP, size_t p, size_t f, //variables required for the population allocation float** gpuCM, float* cpuCM, size_t nC, unsigned int ub, float** gpuM, float* cpuM, unsigned int** gpu_nPxInCls, float** gpuSb, float** gpuSw, float** gpuF, float* cpuF, unsigned int** gpuT, unsigned int* cpuT, size_t tP, unsigned int* cpu_nPxInCls); void gpucomputeSbSw(unsigned int* gpuP, unsigned int* cpuP, size_t p, size_t f, float* gpuSb, float* cpuSb, float* gpuSw, float* cpuSw, float* gpuF, unsigned int* T, float* gpuM, float* gpuCM, size_t nC, size_t tP, cudaDeviceProp props, size_t gen, size_t gnrtn, size_t ub, unsigned int* gpu_nPxInCls, std::ofstream& profilefile); void gpuDestroy(unsigned int* gpuP, float* gpuCM, float* gpuM, unsigned int* gpu_nPxInCls, float* gpuSb, float* gpuSw, float* gpuF, unsigned int* gpuT); struct _fcomplex { float re, im; }; typedef struct _fcomplex fcomplex; Timer timer; class ga_gpu { public: float* F; //pointer to the raw data in host memory unsigned int* T; //pointer to the class labels in host memory size_t gnrtn; //total number of generations size_t p; //population size size_t f; // number of features to be selected unsigned int* P; //pointer to population of current generation genotype matrix (p x f) float* S; //pointer to score(fitness value) of each gnome from current population matric P unsigned int* i_guess; //initial guess of features if mentioined in args add to initial population unsigned int ub; //upper bound for gnome value (maximum feature index from raw feature matrix F) unsigned int lb; //lower bound for gnome value (minimum feature index from raw feature matrix F = 0) float uniformRate; float mutationRate; size_t tournamentSize; //number of potential gnomes to select parent for crossover bool elitism; //if true then passes best gnome to next generation //declare gpu pointers float* gpuF; //Feature matrix unsigned int* gpuT; //target responses of entire feature matrix unsigned int* gpuP; //population matrix unsigned int* gpu_nPxInCls; float* gpuCM; //class mean of entire feature matrix float* gpuM; //total mean of entire feature matrix float* gpuSb; //between class scatter for all individuals of current population float* gpuSw; //within class scatter for all individuals of current population //constructor ga_gpu() {} //==============================generate initial population void initialize_population(std::vector i_guess, bool debug) { if (debug) { std::cout << std::endl; std::cout << "initial populatyion is: " << std::endl; } lb = 0; P = (unsigned int*)calloc(p * f, sizeof(unsigned int)); //allcate memory for genetic population(indices of features from F), p number of gnomes of size f S = (float*)calloc(p, sizeof(float)); //allcate memory for scores(fitness value) of each gnome from P srand(1); //add intial guess to the population if specified by user as a output of other algorithm or by default just random guess std::memcpy(P, i_guess.data(), f * sizeof(unsigned int)); //generate random initial population for (size_t i1 = 1; i1 < p; i1++) { for (size_t i2 = 0; i2 < f; i2++) { P[i1 * f + i2] = rand() % ub + lb; //select element of gnome as random feature index within lower bound(0) and upper bound(B) if (debug) std::cout << P[i1 * f + i2] << "\t"; } if (debug) std::cout << std::endl; } } //===================generation of new population========================================== size_t evolvePopulation(unsigned int* newPop, float* M, bool debug) { //gget index of best gnome in the current population size_t bestG_Indx = gIdxbestGnome(); //-------------(reproduction)------- if (elitism) { saveGnomeIdx(0, bestG_Indx, newPop); //keep best gnome from previous generation to new generation } // ------------Crossover population--------------- int elitismOffset; if (elitism) { elitismOffset = 1; } else { elitismOffset = 0; } //Do crossover for rest of population size for (int i = elitismOffset; i gnome1; gnome1.reserve(f); gnome1 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes // displaygnome(gnome1); std::vectorgnome2; gnome2.reserve(f); gnome2 = tournamentSelection(5); //select first parent for crossover from tournament selection of 5 gnomes // displaygnome(gnome2); std::vectorgnome; gnome.reserve(f); gnome = crossover(gnome1, gnome2, M); //Do crossover of above parent gnomes to produce new gnome // displaygnome(gnome); saveGnome(i, gnome, newPop); //save crosseover result to new population } //--------------Mutate population------------ // introduce some mutation in new population for (int i = elitismOffset; i gnome; gnome.reserve(f); for (size_t n = 0; n < f; n++) gnome.push_back(newPop[i*f + n]); //std::cout<<"\n starting address "<<(&newPop[0] + i*f)<<"\t end address is "<<(&newPop[0] + i*f + f-1) < tournamentSelection(size_t tSize) { // Create a tournament population unsigned int* tournamentP = (unsigned int*)malloc(tSize * f * sizeof(unsigned int)); std::vectortournamentS; // For each place in the tournament get a random individual for (size_t i = 0; i < tSize; i++) { size_t rndmIdx = rand() % p + lb; tournamentS.push_back(S[rndmIdx]); //for (size_t n = 0; n temp_g(getGnome(rndmIdx)); std::copy(temp_g.begin(), temp_g.end(), tournamentP + i*f); } // Get the fittest std::vectorfittestgnome; fittestgnome.reserve(f); //select index of best gnome from fitness score size_t bestSIdx = 0; for (size_t i = 0; i < tSize; i++) { if (tournamentS[i] < tournamentS[bestSIdx]) bestSIdx = i; //float check : it was like this b(&idx[i], &idx[j]) but gave me error } for (size_t n = 0; n < f; n++) fittestgnome.push_back(tournamentP[bestSIdx * f + n]); return fittestgnome; } //end of tournament selection std::vector crossover(std::vector gnome1, std::vector gnome2, float* M) { std::vector gnome; for (size_t i = 0; i < f; i++) { // Crossover float r = static_cast (rand()) / static_cast (RAND_MAX); if (r <= uniformRate) { gnome.push_back(gnome1.at(i)); } else { gnome.push_back(gnome2.at(i)); } } //check new gnome for all zero bands and duplicated values std::vector gnomeunique; int flag = 0; std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7 std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); /* if(gnomeunique.size()< gnome.size()){ flag = 1; std::cout<<"gnome:["< gnome) { for (size_t i = 0; i < f; i++) { float LO = (float)0.01; float HI = 1; float r3 = LO + static_cast (rand()) / (static_cast (RAND_MAX / (HI - LO))); //if random value is less than mutationRate then mutate this gnome if (r3 <= mutationRate) { gnome.at(i) = (rand() % ub + lb); gnome.push_back(rand() % ub + lb); } } } ///returns gnome of given index std::vector getGnome(size_t idx) { std::vector gnome; gnome.reserve(f); //pulling gnome idx from population P for (size_t n = 0; n < f; n++) gnome.push_back(P[idx * f + n]); //memcpy(&gnome[0], P+idx*f, f*sizeof(size_t)); return gnome; } //save gnome of index gIdx from previous population at position i in the new population void saveGnomeIdx(size_t i, size_t gIdx, unsigned int* newPop) { for (size_t n = 0; n < f; n++) newPop[i * f + n] = P[gIdx * f + n]; } void saveGnome(size_t idx, std::vectorgnome, unsigned int* newPop) { std::copy(gnome.begin(), gnome.end(), newPop + idx*f); } size_t gIdxbestGnome() { //std::cout<<"best gnome indes is: "< gnome) { std::cout << "\t gnome: "; for (int i = 0; i S[idx[j]]) { std::swap(idx[i], idx[j]); //float check : it was like this b(&idx[i], &idx[j]) but gave me error } } } //display best gnome //std::cout << "best fitness value: " << S[idx[0]] << std::endl; /*if (S[idx[0]] < 0) { std::cout << "best gnome is " << std::endl; for (size_t i = 0; i < f; i++) std::cout << P[f * idx[0] + i] << ", "; std::cout << std::endl; }*/ return idx; //use as sortSIdx in selection } //size_t* sortIndx(float* input, size_t size) { // //sort indices of score in ascending order (fitness value) // size_t *idx; // idx = (size_t*)malloc(size * sizeof(size_t)); // for (size_t i = 0; i < size; i++) // idx[i] = i; // for (size_t i = 0; i nPxInCls) { for (size_t c = 0; c < nC; c++) { //index of class feature matrix responses float* tempcM = (float*)calloc(B, sizeof(float)); //tempcM holds classmean vector for current gnome 'i', class 'c' for (size_t k = 0; k < tP; k++) { //total number of pixel in feature matrix if (T[k] == c + 1) { //class numbers start from 1 not 0 for (size_t n = 0; n < B; n++) { //total number of features in a gnome tempcM[n] += F[k * B + n]; //add phe value for feature n of class 'c' in ith gnome } } } for (size_t n = 0; n < B; n++) cM[c * B + n] = tempcM[n] / (float)nPxInCls[c]; //divide by number of pixels from class 'c' } } //display class mean void dispalyClassmean(float* cM, size_t nC) { std::cout << std::endl; std::cout << "class mean of gnome 1 with total classes " << nC << " is :" << std::endl; for (size_t i = 0; i < 1; i++) { for (size_t c = 0; c < nC; c++) { for (size_t j = 0; j < f; j++) { size_t index = P[i*f + j]; std::cout << "class index: " << c << "\t feature index " << index << "\t class mean " << cM[c * ub + index] << std::endl; } } } std::cout << std::endl; } //-----------------------------------------between and within class Scattering computation--------------------------------------------------------------- //computation on CPU void cpu_computeSbSw(float* sb, float* sw, float* M, float* cM, size_t nC, size_t tP, std::vector nPxInCls) { timer.start(); computeSb(sb, M, cM, nC, nPxInCls); //compute between class scatter on CPU const auto elapsed = timer.time_elapsed(); std::cout << "Sb CPU time " << std::chrono::duration_cast(elapsed).count() << "us" << std::endl; timer.start(); computeSw(sw, cM, nC, tP); //compute within class scatter on CPU const auto elapsed1 = timer.time_elapsed(); std::cout << "Sw CPU time " << std::chrono::duration_cast(elapsed1).count() << "us" << std::endl; } //display between class scatter void displaySb(float* sb) { std::cout << "between scatter is " << std::endl; for (size_t g = 0; g<1; g++) { std::cout << std::endl; for (size_t j = 0; j < f; j++) { //total number of features in a gnome for (size_t k = 0; k < f; k++) { //total number of features in a gnome std::cout << sb[g * f * f + j * f + k] << " "; } std::cout << std::endl; } } std::cout << std::endl; } //Compute between class scatter sb (p x f x f) of all gnome features phe(tP x f) void computeSb(float* sb, float* M, float* cM, size_t nC, std::vector nPxInCls) { float tempsbval; size_t n1; size_t n2; size_t classIndx; //class index in class mean matrix /*std::cout <<"population of computation of cpusb "<< std::endl; for (size_t i2 = 0; i2 < f; i2++) { std::cout << P[i2] << "\t"; }*/ for (size_t gnomeIndx = 0; gnomeIndx < p; gnomeIndx++) { for (size_t c = 0; c < nC; c++) { for (size_t i = 0; i < f; i++) { for (size_t j = 0; j < f; j++) { tempsbval = 0; classIndx = c * ub; n1 = P[gnomeIndx * f + i]; //actual feature index in original feature matrix n2 = P[gnomeIndx * f + j]; //actual feature index in original feature matrix // std::cout << "i: " << i << " j: " < gnome = getGnome(g); // std::vector gnomeunique; // int flag = 0; //flag will be set if gnome has duplicated band (feature) index // std::sort(gnome.begin(), gnome.end()); // 1 1 2 2 3 3 3 4 4 5 5 6 7 // std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //keep only unique copies of indices and remove duplicate copies // if (gnomeunique.size()< gnome.size()) { // flag = 1; //set flag for those if there are duplicated indices // //std::cout<<"gnome:["< gnome = getGnome(g); //get current gnome g from population matrix P std::vector gnomeunique; //array to store only unique band indicies in a genome int flag = 0; //flag will be set if gnome has duplicated band (feature) index std::sort(gnome.begin(), gnome.end()); //sort current gnome std::unique_copy(gnome.begin(), gnome.end(), std::back_inserter(gnomeunique)); //remove duplicat copies of band indices and keep only unique in a gnome if (gnomeunique.size()< gnome.size()) { flag = 1; //set flag for those if there are duplicated indices //std::cout<<"gnome:["<cpu_nPxInCls, size_t tP, size_t nC) { // call gpuInitialization(......) with all of the necessary parameters gpuIntialization(&gpuP, p, f, &gpuCM, cpuCM, nC, ub, &gpuM, cpuM, &gpu_nPxInCls, &gpuSb, &gpuSw, &gpuF, F, &gpuT, T, tP, &cpu_nPxInCls[0]); } //Computation of between class scatter and within class scatter in GPU void gpu_computeSbSw(float* cpuSb, float* cpuSw, size_t nC, size_t tP, cudaDeviceProp props, size_t gen, bool debug, std::ofstream& profilefile) { //calling function for SW and Sb computation and passing necessary arrays for computation // std::cout<<"gpu function calling"<