Commit ebd0245245fde20cb2e3cf63a19614c7b8286467

Authored by David Mayerich
1 parent 1808c255

added cuBLAS error messages

stim/envi/agilent_binary.h
... ... @@ -171,7 +171,7 @@ public:
171 171  
172 172 //pads to the nearest power-of-two
173 173 void zeropad(){
174   - size_t newZ = pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two
  174 + size_t newZ = (size_t)pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two
175 175 size_t n = newZ - R[2]; //calculate the number of bands to add
176 176 zeropad(n); //add the padding
177 177 }
... ...
stim/envi/bil.h
... ... @@ -243,7 +243,7 @@ public:
243 243  
244 244 //load a frame y into a pre-allocated double-precision array
245 245 int read_plane_xzd(double* f, size_t y){
246   - size_t XB = X() * B();
  246 + size_t XB = X() * Z();
247 247 T* temp = (T*) malloc(XB * sizeof(T)); //create a temporary location to store the plane at current precision
248 248 if(!read_plane_y(temp, y)) return 1; //read the plane in its native format, if it fails return a 1
249 249 for(size_t i = 0; i < XB; i++) f[i] = temp[i]; //convert the plane to a double
... ... @@ -1061,7 +1061,6 @@ public:
1061 1061 }
1062 1062  
1063 1063 int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
1064   - cudaError_t cudaStat;
1065 1064 cublasStatus_t stat;
1066 1065 cublasHandle_t handle;
1067 1066  
... ... @@ -1131,13 +1130,30 @@ public:
1131 1130 progress = 0;
1132 1131  
1133 1132 int dev_count;
1134   - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
  1133 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1134 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
1135 1135 cudaDeviceProp prop;
1136   - cudaGetDeviceProperties(&prop, 0); //get the property of the first device
  1136 + int best_device_id = 0; //stores the best CUDA device
  1137 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1138 + std::cout<<"CUDA devices:"<<std::endl;
  1139 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1140 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1141 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1142 + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1143 + if(cc > best_device_cc){
  1144 + best_device_cc = cc; //if this is better than the previous device, use it
  1145 + best_device_id = d;
  1146 + }
  1147 + }
  1148 +
1137 1149 if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1150 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1151 + HANDLE_ERROR(cudaSetDevice(best_device_id));
1138 1152 int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1139 1153 if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
1140 1154 } //otherwise continue using the CPU
  1155 +
  1156 + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
1141 1157  
1142 1158 //memory allocation
1143 1159 unsigned long long xy = X() * Y();
... ...
stim/envi/bip.h
... ... @@ -1047,17 +1047,31 @@ public:
1047 1047 bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
1048 1048 progress = 0;
1049 1049  
1050   -//#ifdef CUDA_FOUND
1051 1050 int dev_count;
1052   - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
  1051 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1052 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
1053 1053 cudaDeviceProp prop;
1054   - cudaGetDeviceProperties(&prop, 0); //get the property of the first device
  1054 + int best_device_id = 0; //stores the best CUDA device
  1055 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1056 + std::cout<<"CUDA devices:"<<std::endl;
  1057 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1058 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1059 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1060 + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1061 + if(cc > best_device_cc){
  1062 + best_device_cc = cc; //if this is better than the previous device, use it
  1063 + best_device_id = d;
  1064 + }
  1065 + }
  1066 +
1055 1067 if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1068 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1069 + HANDLE_ERROR(cudaSetDevice(best_device_id));
1056 1070 int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1057 1071 if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
1058 1072 } //otherwise continue using the CPU
1059   -//#endif
1060 1073  
  1074 + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
1061 1075 //memory allocation
1062 1076 unsigned long long XY = X() * Y();
1063 1077 unsigned long long B = Z();
... ... @@ -1174,20 +1188,31 @@ public:
1174 1188 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1175 1189 bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
1176 1190  
1177   -//#ifdef CUDA_FOUND
1178 1191 int dev_count;
1179   - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
  1192 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1193 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
1180 1194 cudaDeviceProp prop;
1181   - cudaGetDeviceProperties(&prop, 0); //get the property of the first device
  1195 + int best_device_id = 0; //stores the best CUDA device
  1196 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1197 + std::cout<<"CUDA devices:"<<std::endl;
  1198 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1199 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1200 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1201 + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1202 + if(cc > best_device_cc){
  1203 + best_device_cc = cc; //if this is better than the previous device, use it
  1204 + best_device_id = d;
  1205 + }
  1206 + }
  1207 +
1182 1208 if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1209 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1210 + HANDLE_ERROR(cudaSetDevice(best_device_id));
1183 1211 int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1184 1212 if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
1185 1213 } //otherwise continue using the CPU
1186   - //if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator
1187   - // return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1188   -//#endif
1189   -
1190   -
  1214 +
  1215 + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl;
1191 1216  
1192 1217 progress = 0;
1193 1218 //memory allocation
... ...
stim/envi/bsq.h
... ... @@ -1265,7 +1265,6 @@ public:
1265 1265 yi > sy/2 && yi < Y() - sy/2){
1266 1266 size_t cx = xi - sx/2; //calculate the corner position for the subimage
1267 1267 size_t cy = yi - sy/2;
1268   - size_t cxi, cyi;
1269 1268 for(size_t syi = 0; syi < sy; syi++){ //for each line in the subimage
1270 1269 size_t src_i = (cy + syi) * X() + cx;
1271 1270 //size_t dst_i = syi * (N * sx) + n * sx;
... ...