added cuBLAS error messages

David Mayerich
1 parent 1808c255
Showing 4 changed files with 58 additions and 18 deletions Show diff stats
stim/envi/agilent_binary.h
stim/envi/bil.h
stim/envi/bip.h
stim/envi/bsq.h
@@ -171,7 +171,7 @@ public:
 	//pads to the nearest power-of-two
 	void zeropad(){
-		size_t newZ = pow(2, ceil(log(R[2])/log(2)));			//find the nearest power-of-two
+		size_t newZ = (size_t)pow(2, ceil(log(R[2])/log(2)));			//find the nearest power-of-two
 		size_t n = newZ - R[2];									//calculate the number of bands to add
 		zeropad(n);												//add the padding
 	}
@@ -243,7 +243,7 @@ public:
 	//load a frame y into a pre-allocated double-precision array
 	int read_plane_xzd(double* f, size_t y){		
-		size_t XB = X() * B();
+		size_t XB = X() * Z();
 		T* temp = (T*) malloc(XB * sizeof(T));			//create a temporary location to store the plane at current precision
 		if(!read_plane_y(temp, y)) return 1;			//read the plane in its native format, if it fails return a 1
 		for(size_t i = 0; i < XB; i++) f[i] = temp[i];	//convert the plane to a double
@@ -1061,7 +1061,6 @@ public:
 	}
 	int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
-		cudaError_t cudaStat;
 		cublasStatus_t stat;
 		cublasHandle_t handle;
@@ -1131,13 +1130,30 @@ public:
 		progress = 0;
 		int dev_count;
-		cudaGetDeviceCount(&dev_count);									//get the number of CUDA devices
+		HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
+		std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
 		cudaDeviceProp prop;
-		cudaGetDeviceProperties(&prop, 0);								//get the property of the first device
+		int best_device_id = 0;													//stores the best CUDA device
+		float best_device_cc = 0.0f;												//stores the compute capability of the best device
+		std::cout<<"CUDA devices:"<<std::endl;
+		for(int d = 0; d < dev_count; d++){									//for each CUDA device
+			cudaGetDeviceProperties(&prop, d);								//get the property of the first device
+			float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
+			std::cout<<"("<<prop.major<<"."<<prop.minor<<")      "<<prop.name<<std::endl;	//display the device information
+			if(cc > best_device_cc){
+				best_device_cc = cc;										//if this is better than the previous device, use it
+				best_device_id = d;
+			}
+		}		
+		
 		if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
+			std::cout<<"Using device "<<best_device_id<<std::endl;
+			HANDLE_ERROR(cudaSetDevice(best_device_id));
 			int status = co_matrix_cublas(co, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
 			if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
 		}																	//otherwise continue using the CPU
+		
+		std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
 		//memory allocation
 		unsigned long long xy = X() * Y();
@@ -1047,17 +1047,31 @@ public:
 	bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
 		progress = 0;
-//#ifdef CUDA_FOUND
 		int dev_count;
-		cudaGetDeviceCount(&dev_count);									//get the number of CUDA devices
+		HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
+		std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
 		cudaDeviceProp prop;
-		cudaGetDeviceProperties(&prop, 0);								//get the property of the first device
+		int best_device_id = 0;													//stores the best CUDA device
+		float best_device_cc = 0.0f;												//stores the compute capability of the best device
+		std::cout<<"CUDA devices:"<<std::endl;
+		for(int d = 0; d < dev_count; d++){									//for each CUDA device
+			cudaGetDeviceProperties(&prop, d);								//get the property of the first device
+			float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
+			std::cout<<d<<":   ("<<prop.major<<"."<<prop.minor<<")      "<<prop.name<<std::endl;	//display the device information
+			if(cc > best_device_cc){
+				best_device_cc = cc;										//if this is better than the previous device, use it
+				best_device_id = d;
+			}
+		}		
+		
 		if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
+			std::cout<<"Using device "<<best_device_id<<std::endl;
+			HANDLE_ERROR(cudaSetDevice(best_device_id));
 			int status = co_matrix_cublas(co, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
 			if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
 		}																	//otherwise continue using the CPU
-//#endif
+		std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
 		//memory allocation
 		unsigned long long XY = X() * Y();
 		unsigned long long B = Z();
@@ -1174,20 +1188,31 @@ public:
 	/// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
 	bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
-//#ifdef CUDA_FOUND
 		int dev_count;
-		cudaGetDeviceCount(&dev_count);									//get the number of CUDA devices
+		HANDLE_ERROR(cudaGetDeviceCount(&dev_count));						//get the number of CUDA devices
+		std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl;		//output the number of CUDA devices
 		cudaDeviceProp prop;
-		cudaGetDeviceProperties(&prop, 0);								//get the property of the first device
+		int best_device_id = 0;													//stores the best CUDA device
+		float best_device_cc = 0.0f;												//stores the compute capability of the best device
+		std::cout<<"CUDA devices:"<<std::endl;
+		for(int d = 0; d < dev_count; d++){									//for each CUDA device
+			cudaGetDeviceProperties(&prop, d);								//get the property of the first device
+			float cc = prop.major + prop.minor / 10.0f;						//calculate the compute capability
+			std::cout<<d<<":   ("<<prop.major<<"."<<prop.minor<<")      "<<prop.name<<std::endl;	//display the device information
+			if(cc > best_device_cc){
+				best_device_cc = cc;										//if this is better than the previous device, use it
+				best_device_id = d;
+			}
+		}		
+		
 		if(dev_count > 0 && prop.major != 9999){							//if the first device is not an emulator
+			std::cout<<"Using device "<<best_device_id<<std::endl;
+			HANDLE_ERROR(cudaSetDevice(best_device_id));
 			int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
 			if(status == 0) return true;									//if the cuBLAS function returned correctly, we're done
 		}																	//otherwise continue using the CPU
-		//if(dev_count > 0 && prop.major != 9999)							//if the first device is not an emulator
-		//	return coNoise_matrix_cublas(coN, avg, mask, PROGRESS);			//use cuBLAS to calculate the covariance matrix
-//#endif
-
-
+		
+		std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl;
 		progress = 0;
 		//memory allocation
@@ -1265,7 +1265,6 @@ public:
 						yi > sy/2 && yi < Y() - sy/2){
 						size_t cx = xi - sx/2;					//calculate the corner position for the subimage
 						size_t cy = yi - sy/2;
-						size_t cxi, cyi;
 						for(size_t syi = 0; syi < sy; syi++){					//for each line in the subimage
 							size_t src_i = (cy + syi) * X() + cx;
 							//size_t dst_i = syi * (N * sx) + n * sx;