Commit ebd0245245fde20cb2e3cf63a19614c7b8286467
1 parent
1808c255
added cuBLAS error messages
Showing
4 changed files
with
58 additions
and
18 deletions
Show diff stats
stim/envi/agilent_binary.h
@@ -171,7 +171,7 @@ public: | @@ -171,7 +171,7 @@ public: | ||
171 | 171 | ||
172 | //pads to the nearest power-of-two | 172 | //pads to the nearest power-of-two |
173 | void zeropad(){ | 173 | void zeropad(){ |
174 | - size_t newZ = pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two | 174 | + size_t newZ = (size_t)pow(2, ceil(log(R[2])/log(2))); //find the nearest power-of-two |
175 | size_t n = newZ - R[2]; //calculate the number of bands to add | 175 | size_t n = newZ - R[2]; //calculate the number of bands to add |
176 | zeropad(n); //add the padding | 176 | zeropad(n); //add the padding |
177 | } | 177 | } |
stim/envi/bil.h
@@ -243,7 +243,7 @@ public: | @@ -243,7 +243,7 @@ public: | ||
243 | 243 | ||
244 | //load a frame y into a pre-allocated double-precision array | 244 | //load a frame y into a pre-allocated double-precision array |
245 | int read_plane_xzd(double* f, size_t y){ | 245 | int read_plane_xzd(double* f, size_t y){ |
246 | - size_t XB = X() * B(); | 246 | + size_t XB = X() * Z(); |
247 | T* temp = (T*) malloc(XB * sizeof(T)); //create a temporary location to store the plane at current precision | 247 | T* temp = (T*) malloc(XB * sizeof(T)); //create a temporary location to store the plane at current precision |
248 | if(!read_plane_y(temp, y)) return 1; //read the plane in its native format, if it fails return a 1 | 248 | if(!read_plane_y(temp, y)) return 1; //read the plane in its native format, if it fails return a 1 |
249 | for(size_t i = 0; i < XB; i++) f[i] = temp[i]; //convert the plane to a double | 249 | for(size_t i = 0; i < XB; i++) f[i] = temp[i]; //convert the plane to a double |
@@ -1061,7 +1061,6 @@ public: | @@ -1061,7 +1061,6 @@ public: | ||
1061 | } | 1061 | } |
1062 | 1062 | ||
1063 | int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | 1063 | int co_matrix_cublas(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ |
1064 | - cudaError_t cudaStat; | ||
1065 | cublasStatus_t stat; | 1064 | cublasStatus_t stat; |
1066 | cublasHandle_t handle; | 1065 | cublasHandle_t handle; |
1067 | 1066 | ||
@@ -1131,13 +1130,30 @@ public: | @@ -1131,13 +1130,30 @@ public: | ||
1131 | progress = 0; | 1130 | progress = 0; |
1132 | 1131 | ||
1133 | int dev_count; | 1132 | int dev_count; |
1134 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | 1133 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices |
1134 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | ||
1135 | cudaDeviceProp prop; | 1135 | cudaDeviceProp prop; |
1136 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | 1136 | + int best_device_id = 0; //stores the best CUDA device |
1137 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1138 | + std::cout<<"CUDA devices:"<<std::endl; | ||
1139 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | ||
1140 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1141 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1142 | + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | ||
1143 | + if(cc > best_device_cc){ | ||
1144 | + best_device_cc = cc; //if this is better than the previous device, use it | ||
1145 | + best_device_id = d; | ||
1146 | + } | ||
1147 | + } | ||
1148 | + | ||
1137 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | 1149 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator |
1150 | + std::cout<<"Using device "<<best_device_id<<std::endl; | ||
1151 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | ||
1138 | int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | 1152 | int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix |
1139 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | 1153 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done |
1140 | } //otherwise continue using the CPU | 1154 | } //otherwise continue using the CPU |
1155 | + | ||
1156 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | ||
1141 | 1157 | ||
1142 | //memory allocation | 1158 | //memory allocation |
1143 | unsigned long long xy = X() * Y(); | 1159 | unsigned long long xy = X() * Y(); |
stim/envi/bip.h
@@ -1047,17 +1047,31 @@ public: | @@ -1047,17 +1047,31 @@ public: | ||
1047 | bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | 1047 | bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ |
1048 | progress = 0; | 1048 | progress = 0; |
1049 | 1049 | ||
1050 | -//#ifdef CUDA_FOUND | ||
1051 | int dev_count; | 1050 | int dev_count; |
1052 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | 1051 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices |
1052 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | ||
1053 | cudaDeviceProp prop; | 1053 | cudaDeviceProp prop; |
1054 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | 1054 | + int best_device_id = 0; //stores the best CUDA device |
1055 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1056 | + std::cout<<"CUDA devices:"<<std::endl; | ||
1057 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | ||
1058 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1059 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1060 | + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | ||
1061 | + if(cc > best_device_cc){ | ||
1062 | + best_device_cc = cc; //if this is better than the previous device, use it | ||
1063 | + best_device_id = d; | ||
1064 | + } | ||
1065 | + } | ||
1066 | + | ||
1055 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | 1067 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator |
1068 | + std::cout<<"Using device "<<best_device_id<<std::endl; | ||
1069 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | ||
1056 | int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | 1070 | int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix |
1057 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | 1071 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done |
1058 | } //otherwise continue using the CPU | 1072 | } //otherwise continue using the CPU |
1059 | -//#endif | ||
1060 | 1073 | ||
1074 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | ||
1061 | //memory allocation | 1075 | //memory allocation |
1062 | unsigned long long XY = X() * Y(); | 1076 | unsigned long long XY = X() * Y(); |
1063 | unsigned long long B = Z(); | 1077 | unsigned long long B = Z(); |
@@ -1174,20 +1188,31 @@ public: | @@ -1174,20 +1188,31 @@ public: | ||
1174 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1188 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1175 | bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ | 1189 | bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ |
1176 | 1190 | ||
1177 | -//#ifdef CUDA_FOUND | ||
1178 | int dev_count; | 1191 | int dev_count; |
1179 | - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices | 1192 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices |
1193 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | ||
1180 | cudaDeviceProp prop; | 1194 | cudaDeviceProp prop; |
1181 | - cudaGetDeviceProperties(&prop, 0); //get the property of the first device | 1195 | + int best_device_id = 0; //stores the best CUDA device |
1196 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1197 | + std::cout<<"CUDA devices:"<<std::endl; | ||
1198 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | ||
1199 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1200 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1201 | + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | ||
1202 | + if(cc > best_device_cc){ | ||
1203 | + best_device_cc = cc; //if this is better than the previous device, use it | ||
1204 | + best_device_id = d; | ||
1205 | + } | ||
1206 | + } | ||
1207 | + | ||
1182 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | 1208 | if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator |
1209 | + std::cout<<"Using device "<<best_device_id<<std::endl; | ||
1210 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | ||
1183 | int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | 1211 | int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix |
1184 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | 1212 | if(status == 0) return true; //if the cuBLAS function returned correctly, we're done |
1185 | } //otherwise continue using the CPU | 1213 | } //otherwise continue using the CPU |
1186 | - //if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator | ||
1187 | - // return coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1188 | -//#endif | ||
1189 | - | ||
1190 | - | 1214 | + |
1215 | + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl; | ||
1191 | 1216 | ||
1192 | progress = 0; | 1217 | progress = 0; |
1193 | //memory allocation | 1218 | //memory allocation |
stim/envi/bsq.h
@@ -1265,7 +1265,6 @@ public: | @@ -1265,7 +1265,6 @@ public: | ||
1265 | yi > sy/2 && yi < Y() - sy/2){ | 1265 | yi > sy/2 && yi < Y() - sy/2){ |
1266 | size_t cx = xi - sx/2; //calculate the corner position for the subimage | 1266 | size_t cx = xi - sx/2; //calculate the corner position for the subimage |
1267 | size_t cy = yi - sy/2; | 1267 | size_t cy = yi - sy/2; |
1268 | - size_t cxi, cyi; | ||
1269 | for(size_t syi = 0; syi < sy; syi++){ //for each line in the subimage | 1268 | for(size_t syi = 0; syi < sy; syi++){ //for each line in the subimage |
1270 | size_t src_i = (cy + syi) * X() + cx; | 1269 | size_t src_i = (cy + syi) * X() + cx; |
1271 | //size_t dst_i = syi * (N * sx) + n * sx; | 1270 | //size_t dst_i = syi * (N * sx) + n * sx; |