implemented a no-gpu option

David Mayerich
1 parent 87d0a1d2
Showing 3 changed files with 88 additions and 82 deletions Show diff stats
stim/envi/bil.h
stim/envi/bip.h
stim/envi/envi.h
@@ -1126,34 +1126,36 @@ public:
 	/// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
 	/// @param avg is a pointer to memory of size B that stores the average spectrum
 	/// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
-	bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
+	bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
 		progress = 0;
-		int dev_count;
-		HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
-		std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
-		cudaDeviceProp prop;
-		int best_device_id = 0;													//stores the best CUDA device
-		float best_device_cc = 0.0f;												//stores the compute capability of the best device
-		std::cout<<"CUDA devices:"<<std::endl;
-		for(int d = 0; d < dev_count; d++){									//for each CUDA device
-			cudaGetDeviceProperties(&prop, d);								//get the property of the first device
-			float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
-			std::cout<<"("<<prop.major<<"."<<prop.minor<<")      "<<prop.name<<std::endl;	//display the device information
-			if(cc > best_device_cc){
-				best_device_cc = cc;										//if this is better than the previous device, use it
-				best_device_id = d;
-			}
-		}		
+		if(use_gpu){
+			int dev_count;
+			HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
+			std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
+			cudaDeviceProp prop;
+			int best_device_id = 0;													//stores the best CUDA device
+			float best_device_cc = 0.0f;												//stores the compute capability of the best device
+			std::cout<<"CUDA devices:"<<std::endl;
+			for(int d = 0; d < dev_count; d++){									//for each CUDA device
+				cudaGetDeviceProperties(&prop, d);								//get the property of the first device
+				float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
+				std::cout<<"("<<prop.major<<"."<<prop.minor<<")      "<<prop.name<<std::endl;	//display the device information
+				if(cc > best_device_cc){
+					best_device_cc = cc;										//if this is better than the previous device, use it
+					best_device_id = d;
+				}
+			}		
-		if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
-			std::cout<<"Using device "<<best_device_id<<std::endl;
-			HANDLE_ERROR(cudaSetDevice(best_device_id));
-			int status = co_matrix_cublas(co, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
-			if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
-		}																	//otherwise continue using the CPU
+			if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
+				std::cout<<"Using device "<<best_device_id<<std::endl;
+				HANDLE_ERROR(cudaSetDevice(best_device_id));
+				int status = co_matrix_cublas(co, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
+				if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
+			}																	//otherwise continue using the CPU
-		std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
+			std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
+		}
 		//memory allocation
 		unsigned long long xy = X() * Y();
@@ -1044,34 +1044,36 @@ public:
 	/// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
 	/// @param avg is a pointer to memory of size B that stores the average spectrum
 	/// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
-	bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
+	bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
 		progress = 0;
-		int dev_count;
-		HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
-		std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
-		cudaDeviceProp prop;
-		int best_device_id = 0;													//stores the best CUDA device
-		float best_device_cc = 0.0f;												//stores the compute capability of the best device
-		std::cout<<"CUDA devices:"<<std::endl;
-		for(int d = 0; d < dev_count; d++){									//for each CUDA device
-			cudaGetDeviceProperties(&prop, d);								//get the property of the first device
-			float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
-			std::cout<<d<<":   ("<<prop.major<<"."<<prop.minor<<")      "<<prop.name<<std::endl;	//display the device information
-			if(cc > best_device_cc){
-				best_device_cc = cc;										//if this is better than the previous device, use it
-				best_device_id = d;
-			}
-		}		
+		if(use_gpu){
+			int dev_count;
+			HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
+			std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
+			cudaDeviceProp prop;
+			int best_device_id = 0;													//stores the best CUDA device
+			float best_device_cc = 0.0f;												//stores the compute capability of the best device
+			std::cout<<"CUDA devices----"<<std::endl;
+			for(int d = 0; d < dev_count; d++){									//for each CUDA device
+				cudaGetDeviceProperties(&prop, d);								//get the property of the first device
+				float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
+				std::cout<<d<<":  ["<<prop.major<<"."<<prop.minor<<"]      "<<prop.name<<std::endl;	//display the device information
+				if(cc > best_device_cc){
+					best_device_cc = cc;										//if this is better than the previous device, use it
+					best_device_id = d;
+				}
+			}		
-		if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
-			std::cout<<"Using device "<<best_device_id<<std::endl;
-			HANDLE_ERROR(cudaSetDevice(best_device_id));
-			int status = co_matrix_cublas(co, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
-			if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
-		}																	//otherwise continue using the CPU
+			if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
+				std::cout<<"Using device "<<best_device_id<<std::endl;
+				HANDLE_ERROR(cudaSetDevice(best_device_id));
+				int status = co_matrix_cublas(co, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
+				if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
+			}																	//otherwise continue using the CPU
-		std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
+			std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
+		}
 		//memory allocation
 		unsigned long long XY = X() * Y();
 		unsigned long long B = Z();
@@ -1177,7 +1179,7 @@ public:
 			}
 		}
-		return true;
+		return 0;
 	}
 //#endif
@@ -1186,33 +1188,35 @@ public:
 	/// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
 	/// @param avg is a pointer to memory of size B that stores the average spectrum
 	/// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
-	bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
-
-		int dev_count;
-		HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
-		std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
-		cudaDeviceProp prop;
-		int best_device_id = 0;													//stores the best CUDA device
-		float best_device_cc = 0.0f;												//stores the compute capability of the best device
-		std::cout<<"CUDA devices:"<<std::endl;
-		for(int d = 0; d < dev_count; d++){									//for each CUDA device
-			cudaGetDeviceProperties(&prop, d);								//get the property of the first device
-			float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
-			std::cout<<d<<":   ("<<prop.major<<"."<<prop.minor<<")      "<<prop.name<<std::endl;	//display the device information
-			if(cc > best_device_cc){
-				best_device_cc = cc;										//if this is better than the previous device, use it
-				best_device_id = d;
-			}
-		}		
+	bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
+
+		if(use_gpu){
+			int dev_count;
+			HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
+			std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
+			cudaDeviceProp prop;
+			int best_device_id = 0;													//stores the best CUDA device
+			float best_device_cc = 0.0f;												//stores the compute capability of the best device
+			std::cout<<"CUDA devices:"<<std::endl;
+			for(int d = 0; d < dev_count; d++){									//for each CUDA device
+				cudaGetDeviceProperties(&prop, d);								//get the property of the first device
+				float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
+				std::cout<<d<<":   ("<<prop.major<<"."<<prop.minor<<")      "<<prop.name<<std::endl;	//display the device information
+				if(cc > best_device_cc){
+					best_device_cc = cc;										//if this is better than the previous device, use it
+					best_device_id = d;
+				}
+			}		
-		if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
-			std::cout<<"Using device "<<best_device_id<<std::endl;
-			HANDLE_ERROR(cudaSetDevice(best_device_id));
-			int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
-			if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
-		}																	//otherwise continue using the CPU
+			if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
+				std::cout<<"Using device "<<best_device_id<<std::endl;
+				HANDLE_ERROR(cudaSetDevice(best_device_id));
+				int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
+				if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
+			}																	//otherwise continue using the CPU
-		std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl;
+			std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl;
+		}
 		progress = 0;
 		//memory allocation
@@ -1441,16 +1441,16 @@ public:
 	/// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
 	/// @param avg is a pointer to memory of size B that stores the average spectrum
 	/// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
-	bool co_matrix(double* co, double* avg, unsigned char* mask, bool PROGRESS = false){
+	bool co_matrix(double* co, double* avg, unsigned char* mask, bool use_gpu, bool PROGRESS = false){
 		if (header.interleave == envi_header::BSQ){
 			std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl;
 			exit(1);
 		}
 		else if (header.interleave == envi_header::BIL){
 			if (header.data_type == envi_header::float32)
-				return ((bil<float>*)file)->co_matrix(co, avg, mask, PROGRESS);
+				return ((bil<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
 			else if (header.data_type == envi_header::float64)
-				return ((bil<double>*)file)->co_matrix(co, avg, mask, PROGRESS);
+				return ((bil<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
 			else{
 				std::cout << "ERROR: unidentified data type" << std::endl;
 				exit(1);
@@ -1458,9 +1458,9 @@ public:
 		}
 		else if (header.interleave == envi_header::BIP){
 			if (header.data_type == envi_header::float32)
-				return ((bip<float>*)file)->co_matrix(co, avg, mask, PROGRESS);
+				return ((bip<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
 			else if (header.data_type == envi_header::float64)
-				return ((bip<double>*)file)->co_matrix(co, avg, mask, PROGRESS);
+				return ((bip<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
 			else{
 				std::cout << "ERROR: unidentified data type" << std::endl;
 				exit(1);
@@ -1474,7 +1474,7 @@ public:
 	/// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
 	/// @param avg is a pointer to memory of size B that stores the average spectrum
 	/// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
-	bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){
+	bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool use_gpu = true, bool PROGRESS = false){
 		if (header.interleave == envi_header::BSQ){
 			std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl;
 			exit(1);
@@ -1488,9 +1488,9 @@ public:
 		else if (header.interleave == envi_header::BIP){
 			if (header.data_type == envi_header::float32)
-				return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
+				return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
 			else if (header.data_type == envi_header::float64)
-				return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
+				return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
 			else{
 				std::cout << "ERROR: unidentified data type" << std::endl;
 				exit(1);