Commit ebd0245245fde20cb2e3cf63a19614c7b8286467
1 parent
1808c255
added cuBLAS error messages
Showing
4 changed files
with
58 additions
and
18 deletions
Show diff stats
stim/envi/agilent_binary.h
... | ... | @@ -171,7 +171,7 @@ public: |
171 | 171 | |
172 | 172 | //pads to the nearest power-of-two |
173 | 173 | void zeropad(){ |
174 | - size_t newZ = pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two | |
174 | + size_t newZ = (size_t)pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two | |
175 | 175 | size_t n = newZ - R[2]; //calculate the number of bands to add |
176 | 176 | zeropad(n); //add the padding |
177 | 177 | } | ... | ... |
stim/envi/bil.h
... | ... | @@ -243,7 +243,7 @@ public: |
243 | 243 | |
244 | 244 | //load a frame y into a pre-allocated double-precision array |
245 | 245 | int read_plane_xzd(double* f, size_t y){ |
246 | - size_t XB = X() * B(); | |
246 | + size_t XB = X() * Z(); | |
247 | 247 | T* temp = (T*) malloc(XB * sizeof(T)); //create a temporary location to store the plane at current precision |
248 | 248 | if(!read_plane_y(temp, y)) return 1; //read the plane in its native format, if it fails return a 1 |
249 | 249 | for(size_t i = 0; i < XB; i++) f[i] = temp[i]; //convert the plane to a double |
... | ... | @@ -1061,7 +1061,6 @@ public: |
1061 | 1061 | } |
1062 | 1062 | |
1063 | 1063 | int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ |
1064 | - cudaError_t cudaStat; | |
1065 | 1064 | cublasStatus_t stat; |
1066 | 1065 | cublasHandle_t handle; |
1067 | 1066 | |
... | ... | @@ -1131,13 +1130,30 @@ public: |
1131 | 1130 | progress = 0; |
1132 | 1131 | |
1133 | 1132 | int dev_count; |
1134 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | |
1133 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1134 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1135 | 1135 | cudaDeviceProp prop; |
1136 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | |
1136 | + int best_device_id = 0; //stores the best CUDA device | |
1137 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1138 | + std::cout<<"CUDA devices:"<<std::endl; | |
1139 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1140 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1141 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1142 | + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1143 | + if(cc > best_device_cc){ | |
1144 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1145 | + best_device_id = d; | |
1146 | + } | |
1147 | + } | |
1148 | + | |
1137 | 1149 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator |
1150 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1151 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1138 | 1152 | int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix |
1139 | 1153 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done |
1140 | 1154 | } //otherwise continue using the CPU |
1155 | + | |
1156 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | |
1141 | 1157 | |
1142 | 1158 | //memory allocation |
1143 | 1159 | unsigned long long xy = X() * Y(); | ... | ... |
stim/envi/bip.h
... | ... | @@ -1047,17 +1047,31 @@ public: |
1047 | 1047 | bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ |
1048 | 1048 | progress = 0; |
1049 | 1049 | |
1050 | -//#ifdef CUDA_FOUND | |
1051 | 1050 | int dev_count; |
1052 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | |
1051 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1052 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1053 | 1053 | cudaDeviceProp prop; |
1054 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | |
1054 | + int best_device_id = 0; //stores the best CUDA device | |
1055 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1056 | + std::cout<<"CUDA devices:"<<std::endl; | |
1057 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1058 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1059 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1060 | + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1061 | + if(cc > best_device_cc){ | |
1062 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1063 | + best_device_id = d; | |
1064 | + } | |
1065 | + } | |
1066 | + | |
1055 | 1067 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator |
1068 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1069 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1056 | 1070 | int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix |
1057 | 1071 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done |
1058 | 1072 | } //otherwise continue using the CPU |
1059 | -//#endif | |
1060 | 1073 | |
1074 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | |
1061 | 1075 | //memory allocation |
1062 | 1076 | unsigned long long XY = X() * Y(); |
1063 | 1077 | unsigned long long B = Z(); |
... | ... | @@ -1174,20 +1188,31 @@ public: |
1174 | 1188 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1175 | 1189 | bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ |
1176 | 1190 | |
1177 | -//#ifdef CUDA_FOUND | |
1178 | 1191 | int dev_count; |
1179 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | |
1192 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1193 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1180 | 1194 | cudaDeviceProp prop; |
1181 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | |
1195 | + int best_device_id = 0; //stores the best CUDA device | |
1196 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1197 | + std::cout<<"CUDA devices:"<<std::endl; | |
1198 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1199 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1200 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1201 | + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1202 | + if(cc > best_device_cc){ | |
1203 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1204 | + best_device_id = d; | |
1205 | + } | |
1206 | + } | |
1207 | + | |
1182 | 1208 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator |
1209 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1210 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1183 | 1211 | int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix |
1184 | 1212 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done |
1185 | 1213 | } //otherwise continue using the CPU |
1186 | - //if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator | |
1187 | - // return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1188 | -//#endif | |
1189 | - | |
1190 | - | |
1214 | + | |
1215 | + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl; | |
1191 | 1216 | |
1192 | 1217 | progress = 0; |
1193 | 1218 | //memory allocation | ... | ... |
stim/envi/bsq.h
... | ... | @@ -1265,7 +1265,6 @@ public: |
1265 | 1265 | yi > sy/2 && yi < Y() - sy/2){ |
1266 | 1266 | size_t cx = xi - sx/2; //calculate the corner position for the subimage |
1267 | 1267 | size_t cy = yi - sy/2; |
1268 | - size_t cxi, cyi; | |
1269 | 1268 | for(size_t syi = 0; syi < sy; syi++){ //for each line in the subimage |
1270 | 1269 | size_t src_i = (cy + syi) * X() + cx; |
1271 | 1270 | //size_t dst_i = syi * (N * sx) + n * sx; | ... | ... |