From 334547d8a4ebc1de16f52a6d48451d11de0781b1 Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 22 Aug 2016 16:21:28 -0500 Subject: [PATCH] changes for mnf: adding covariance of noise function in cpu and gpu also a gpu version of project function --- stim/envi/bip.h | 239 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- stim/envi/envi.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+), 2 deletions(-) diff --git a/stim/envi/bip.h b/stim/envi/bip.h index afe4a90..2299003 100644 --- a/stim/envi/bip.h +++ b/stim/envi/bip.h @@ -373,7 +373,7 @@ public: for(size_t xy = 0; xy < XY; xy++){ //for each pixel memset(spec, 0, Bb); //set the spectrum to zero if(mask == NULL || mask[xy]){ //if the pixel is masked - len = 0; //initialize the + len = 0; //initialize the file.read((char*)spec, Bb); //read a spectrum for(size_t b = 0; b < B; b++) //for each band len += spec[b]*spec[b]; //add the square of the spectral band @@ -385,7 +385,7 @@ public: file.seekg(Bb, std::ios::cur); //otherwise skip a spectrum target.write((char*)spec, Bb); //output the normalized spectrum if(PROGRESS) progress = (double)(xy + 1) / (double)XY * 100; //update the progress - } + } } @@ -1088,6 +1088,233 @@ public: return true; } + + #ifdef CUDA_FOUND + /// Calculate the covariance matrix of Noise for masked pixels using cuBLAS + /// Note that cuBLAS only supports integer-sized arrays, so there may be issues with large spectra + bool coNoise_matrix_cublas(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ + + cudaError_t cudaStat; + cublasStatus_t stat; + cublasHandle_t handle; + + progress = 0; //initialize the progress to zero (0) + unsigned long long XY = X() * Y(); //calculate the number of elements in a band image + unsigned long long B = Z(); //calculate the number of spectral elements + + double* s = (double*)malloc(sizeof(double) * B); //allocate space for the spectrum that will be pulled from the file + double* s_dev; //declare a device pointer that will store the spectrum on the GPU + + double* s2_dev; // device pointer on the GPU + cudaStat = cudaMalloc(&s2_dev, B * sizeof(double)); // allocate space on the CUDA device + cudaStat = cudaMemset(s2_dev, 0, B * sizeof(double)); // initialize s2_dev to zero (0) + + double* A_dev; //declare a device pointer that will store the covariance matrix on the GPU + double* avg_dev; //declare a device pointer that will store the average spectrum + cudaStat = cudaMalloc(&s_dev, B * sizeof(double)); //allocate space on the CUDA device for the spectrum + cudaStat = cudaMalloc(&A_dev, B * B * sizeof(double)); //allocate space on the CUDA device for the covariance matrix + cudaStat = cudaMemset(A_dev, 0, B * B * sizeof(double)); //initialize the covariance matrix to zero (0) + cudaStat = cudaMalloc(&avg_dev, B * sizeof(double)); //allocate space on the CUDA device for the average spectrum + stat = cublasSetVector((int)B, sizeof(double), avg, 1, avg_dev, 1); //copy the average spectrum to the CUDA device + + double ger_alpha = 1.0/(double)XY; //scale the outer product by the inverse of the number of samples (mean outer product) + double axpy_alpha = -1; //multiplication factor for the average spectrum (in order to perform a subtraction) + + stat = cublasCreate(&handle); //create a cuBLAS instance + if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid + printf ("CUBLAS initialization failed\n"); + return EXIT_FAILURE; + } + for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel + if (mask == NULL || mask[xy] != 0){ + pixeld(s, xy); //retreive the spectrum at the current xy pixel location + + stat = cublasSetVector((int)B, sizeof(double), s, 1, s_dev, 1); //copy the spectrum from the host to the device + stat = cublasDaxpy(handle, (int)B, &axpy_alpha, avg_dev, 1, s_dev, 1); //subtract the average spectrum + + cudaMemcpy(s2_dev, s_dev + 1 , (B-1) * sizeof(double), cudaMemcpyDeviceToDevice); //copy B-1 elements from shifted source data (s_dev) to device pointer (s2_dev ) + stat = cublasDaxpy(handle, (int)B, &axpy_alpha, s2_dev, 1, s_dev, 1); //Minimum/Maximum Autocorrelation Factors (MAF) method : subtranct each pixel from adjacent pixel (z direction is choosed to do so , which is almost the same as x or y direction or even average of them ) + + + stat = cublasDsyr(handle, CUBLAS_FILL_MODE_UPPER, (int)B, &ger_alpha, s_dev, 1, A_dev, (int)B); //calculate the covariance matrix (symmetric outer product) + } + if(PROGRESS) progress = (double)(xy+1) / XY * 100; //record the current progress + + } + + cublasGetMatrix((int)B, (int)B, sizeof(double), A_dev, (int)B, coN, (int)B); //copy the result from the GPU to the CPU + + cudaFree(A_dev); //clean up allocated device memory + cudaFree(s_dev); + cudaFree(s2_dev); + cudaFree(avg_dev); + + for(unsigned long long i = 0; i < B; i++){ //copy the upper triangular portion to the lower triangular portion + for(unsigned long long j = i+1; j < B; j++){ + coN[B * i + j] = coN[B * j + i]; + } + } + + return true; + } +#endif + + /// Calculate the covariance of noise matrix for all masked pixels in the image with 64-bit floating point precision. + + /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix + /// @param avg is a pointer to memory of size B that stores the average spectrum + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ + +#ifdef CUDA_FOUND + int dev_count; + cudaGetDeviceCount(&dev_count); //get the number of CUDA devices + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, 0); //get the property of the first device + if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator + return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix +#endif + + + + progress = 0; + //memory allocation + unsigned long long XY = X() * Y(); + unsigned long long B = Z(); + T* temp = (T*)malloc(sizeof(T) * B); + + unsigned long long count = nnz(mask); //count the number of masked pixels + + //initialize covariance matrix of noise + memset(coN, 0, B * B * sizeof(double)); + + //calculate covariance matrix + double* coN_half = (double*) malloc(B * B * sizeof(double)); //allocate space for a higher-precision intermediate matrix + double* temp_precise = (double*) malloc(B * sizeof(double)); + memset(coN_half, 0, B * B * sizeof(double)); //initialize the high-precision matrix with zeros + unsigned long long idx; //stores i*B to speed indexing + for (unsigned long long xy = 0; xy < XY; xy++){ + if (mask == NULL || mask[xy] != 0){ + pixel(temp, xy); //retreive the spectrum at the current xy pixel location + for(unsigned long long b = 0; b < B; b++) //subtract the mean from this spectrum and increase the precision + temp_precise[b] = (double)temp[b] - (double)avg[b]; + + for(unsigned long long b2 = 0; b2 < B-1; b2++) //Minimum/Maximum Autocorrelation Factors (MAF) method : subtranct each pixel from adjacent pixel (z direction is choosed to do so , which is almost the same as x or y direction or even average of them ) + temp_precise[b2] -= temp_precise[b2+1]; + + idx = 0; + for (unsigned long long b0 = 0; b0 < B; b0++){ //for each band + for (unsigned long long b1 = b0; b1 < B; b1++) + coN_half[idx++] += temp_precise[b0] * temp_precise[b1]; + } + } + if(PROGRESS) progress = (double)(xy+1) / XY * 100; + } + idx = 0; + for (unsigned long long i = 0; i < B; i++){ //copy the precision matrix to both halves of the output matrix + for (unsigned long long j = i; j < B; j++){ + coN[j * B + i] = coN[i * B + j] = coN_half[idx++] / (double) count; + } + } + + free(temp); + free(temp_precise); + return true; + } + + #ifdef CUDA_FOUND + /// Project the spectra onto a set of basis functions + /// @param outfile is the name of the new binary output file that will be created + /// @param center is a spectrum about which the data set will be rotated (ex. when performing mean centering) + /// @param basis a set of basis vectors that the data set will be projected onto (after centering) + /// @param M is the number of basis vectors + /// @param mask is a character mask used to limit processing to valid pixels + bool project_cublas(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){ + + cudaError_t cudaStat; + cublasStatus_t stat; + cublasHandle_t handle; + + std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file + //std::string headername = outfile + ".hdr"; //the header file name + + progress = 0; //initialize the progress to zero (0) + unsigned long long XY = X() * Y(); //calculate the number of elements in a band image + unsigned long long B = Z(); //calculate the number of spectral elements + + double* s = (double*)malloc(sizeof(double) * B); //allocate space for the spectrum that will be pulled from the file + double* s_dev; //declare a device pointer that will store the spectrum on the GPU + cudaStat = cudaMalloc(&s_dev, B * sizeof(double)); //allocate space on the CUDA device for the spectrum + + + double* basis_dev; // device pointer on the GPU + cudaStat = cudaMalloc(&basis_dev, M * B * sizeof(double)); // allocate space on the CUDA device + cudaStat = cudaMemset(basis_dev, 0, M * B * sizeof(double)); // initialize basis_dev to zero (0) + + + /// transposing basis matrix (because cuBLAS is column-major) + double *basis_Transposed = (double*)malloc(M * B * sizeof(double)); + memset(basis_Transposed, 0, M * B * sizeof(double)); + for (int i = 0; i(temp), sizeof(T) * M); //write the projected vector + if(PROGRESS) progress = (double)(xy+1) / XY * 100; //record the current progress + + } + + //clean up allocated device memory + cudaFree(A_dev); + cudaFree(s_dev); + cudaFree(basis_dev); + cudaFree(center_dev); + free(A); + free(s); + free(temp); + target.close(); //close the output file + + return true; + } +#endif + /// Project the spectra onto a set of basis functions /// @param outfile is the name of the new binary output file that will be created /// @param center is a spectrum about which the data set will be rotated (ex. when performing mean centering) @@ -1096,6 +1323,14 @@ public: /// @param mask is a character mask used to limit processing to valid pixels bool project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){ +#ifdef CUDA_FOUND + int dev_count; + cudaGetDeviceCount(&dev_count); //get the number of CUDA devices + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, 0); //get the property of the first device + if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator + return project_cublas(outfile,center,basis,M,mask,PROGRESS); //use cuBLAS to calculate the covariance matrix +#endif std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file //std::string headername = outfile + ".hdr"; //the header file name diff --git a/stim/envi/envi.h b/stim/envi/envi.h index 994bc64..9c8506a 100644 --- a/stim/envi/envi.h +++ b/stim/envi/envi.h @@ -1403,6 +1403,35 @@ public: return false; } + /// Calculate the covariance of noise matrix for all masked pixels in the image. + + /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix + /// @param avg is a pointer to memory of size B that stores the average spectrum + /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){ + if (header.interleave == envi_header::BSQ){ + std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); + else if (header.data_type == envi_header::float64) + return ((bip*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); + else{ + std::cout << "ERROR: unidentified data type" << std::endl; + exit(1); + } + } + return false; + } /// Crop a region of the image and save it to a new file. -- libgit2 0.21.4