Commit dc99699ec99be17c0d8ff6d09a15b86522d28732
1 parent
87d0a1d2
implemented a no-gpu option
Showing
3 changed files
with
88 additions
and
82 deletions
Show diff stats
stim/envi/bil.h
... | ... | @@ -1126,34 +1126,36 @@ public: |
1126 | 1126 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1127 | 1127 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1128 | 1128 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1129 | - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1129 | + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ | |
1130 | 1130 | progress = 0; |
1131 | 1131 | |
1132 | - int dev_count; | |
1133 | - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1134 | - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1135 | - cudaDeviceProp prop; | |
1136 | - int best_device_id = 0; //stores the best CUDA device | |
1137 | - float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1138 | - std::cout<<"CUDA devices:"<<std::endl; | |
1139 | - for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1140 | - cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1141 | - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1142 | - std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1143 | - if(cc > best_device_cc){ | |
1144 | - best_device_cc = cc; //if this is better than the previous device, use it | |
1145 | - best_device_id = d; | |
1146 | - } | |
1147 | - } | |
1132 | + if(use_gpu){ | |
1133 | + int dev_count; | |
1134 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1135 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1136 | + cudaDeviceProp prop; | |
1137 | + int best_device_id = 0; //stores the best CUDA device | |
1138 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1139 | + std::cout<<"CUDA devices:"<<std::endl; | |
1140 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1141 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1142 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1143 | + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1144 | + if(cc > best_device_cc){ | |
1145 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1146 | + best_device_id = d; | |
1147 | + } | |
1148 | + } | |
1148 | 1149 | |
1149 | - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1150 | - std::cout<<"Using device "<<best_device_id<<std::endl; | |
1151 | - HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1152 | - int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1153 | - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1154 | - } //otherwise continue using the CPU | |
1150 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1151 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1152 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1153 | + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1154 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1155 | + } //otherwise continue using the CPU | |
1155 | 1156 | |
1156 | - std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | |
1157 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | |
1158 | + } | |
1157 | 1159 | |
1158 | 1160 | //memory allocation |
1159 | 1161 | unsigned long long xy = X() * Y(); | ... | ... |
stim/envi/bip.h
... | ... | @@ -1044,34 +1044,36 @@ public: |
1044 | 1044 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1045 | 1045 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1046 | 1046 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1047 | - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1047 | + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ | |
1048 | 1048 | progress = 0; |
1049 | 1049 | |
1050 | - int dev_count; | |
1051 | - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1052 | - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1053 | - cudaDeviceProp prop; | |
1054 | - int best_device_id = 0; //stores the best CUDA device | |
1055 | - float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1056 | - std::cout<<"CUDA devices:"<<std::endl; | |
1057 | - for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1058 | - cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1059 | - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1060 | - std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1061 | - if(cc > best_device_cc){ | |
1062 | - best_device_cc = cc; //if this is better than the previous device, use it | |
1063 | - best_device_id = d; | |
1064 | - } | |
1065 | - } | |
1050 | + if(use_gpu){ | |
1051 | + int dev_count; | |
1052 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1053 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1054 | + cudaDeviceProp prop; | |
1055 | + int best_device_id = 0; //stores the best CUDA device | |
1056 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1057 | + std::cout<<"CUDA devices----"<<std::endl; | |
1058 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1059 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1060 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1061 | + std::cout<<d<<": ["<<prop.major<<"."<<prop.minor<<"] "<<prop.name<<std::endl; //display the device information | |
1062 | + if(cc > best_device_cc){ | |
1063 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1064 | + best_device_id = d; | |
1065 | + } | |
1066 | + } | |
1066 | 1067 | |
1067 | - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1068 | - std::cout<<"Using device "<<best_device_id<<std::endl; | |
1069 | - HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1070 | - int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1071 | - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1072 | - } //otherwise continue using the CPU | |
1068 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1069 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1070 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1071 | + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1072 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1073 | + } //otherwise continue using the CPU | |
1073 | 1074 | |
1074 | - std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | |
1075 | + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | |
1076 | + } | |
1075 | 1077 | //memory allocation |
1076 | 1078 | unsigned long long XY = X() * Y(); |
1077 | 1079 | unsigned long long B = Z(); |
... | ... | @@ -1177,7 +1179,7 @@ public: |
1177 | 1179 | } |
1178 | 1180 | } |
1179 | 1181 | |
1180 | - return true; | |
1182 | + return 0; | |
1181 | 1183 | } |
1182 | 1184 | //#endif |
1183 | 1185 | |
... | ... | @@ -1186,33 +1188,35 @@ public: |
1186 | 1188 | /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1187 | 1189 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1188 | 1190 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1189 | - bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){ | |
1190 | - | |
1191 | - int dev_count; | |
1192 | - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1193 | - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1194 | - cudaDeviceProp prop; | |
1195 | - int best_device_id = 0; //stores the best CUDA device | |
1196 | - float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1197 | - std::cout<<"CUDA devices:"<<std::endl; | |
1198 | - for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1199 | - cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1200 | - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1201 | - std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1202 | - if(cc > best_device_cc){ | |
1203 | - best_device_cc = cc; //if this is better than the previous device, use it | |
1204 | - best_device_id = d; | |
1205 | - } | |
1206 | - } | |
1191 | + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){ | |
1192 | + | |
1193 | + if(use_gpu){ | |
1194 | + int dev_count; | |
1195 | + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | |
1196 | + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | |
1197 | + cudaDeviceProp prop; | |
1198 | + int best_device_id = 0; //stores the best CUDA device | |
1199 | + float best_device_cc = 0.0f; //stores the compute capability of the best device | |
1200 | + std::cout<<"CUDA devices:"<<std::endl; | |
1201 | + for(int d = 0; d < dev_count; d++){ //for each CUDA device | |
1202 | + cudaGetDeviceProperties(&prop, d); //get the property of the first device | |
1203 | + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | |
1204 | + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | |
1205 | + if(cc > best_device_cc){ | |
1206 | + best_device_cc = cc; //if this is better than the previous device, use it | |
1207 | + best_device_id = d; | |
1208 | + } | |
1209 | + } | |
1207 | 1210 | |
1208 | - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1209 | - std::cout<<"Using device "<<best_device_id<<std::endl; | |
1210 | - HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1211 | - int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1212 | - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1213 | - } //otherwise continue using the CPU | |
1211 | + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator | |
1212 | + std::cout<<"Using device "<<best_device_id<<std::endl; | |
1213 | + HANDLE_ERROR(cudaSetDevice(best_device_id)); | |
1214 | + int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | |
1215 | + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | |
1216 | + } //otherwise continue using the CPU | |
1214 | 1217 | |
1215 | - std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl; | |
1218 | + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl; | |
1219 | + } | |
1216 | 1220 | |
1217 | 1221 | progress = 0; |
1218 | 1222 | //memory allocation | ... | ... |
stim/envi/envi.h
... | ... | @@ -1441,16 +1441,16 @@ public: |
1441 | 1441 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1442 | 1442 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1443 | 1443 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1444 | - bool co_matrix(double* co, double* avg, unsigned char* mask, bool PROGRESS = false){ | |
1444 | + bool co_matrix(double* co, double* avg, unsigned char* mask, bool use_gpu, bool PROGRESS = false){ | |
1445 | 1445 | if (header.interleave == envi_header::BSQ){ |
1446 | 1446 | std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl; |
1447 | 1447 | exit(1); |
1448 | 1448 | } |
1449 | 1449 | else if (header.interleave == envi_header::BIL){ |
1450 | 1450 | if (header.data_type == envi_header::float32) |
1451 | - return ((bil<float>*)file)->co_matrix(co, avg, mask, PROGRESS); | |
1451 | + return ((bil<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); | |
1452 | 1452 | else if (header.data_type == envi_header::float64) |
1453 | - return ((bil<double>*)file)->co_matrix(co, avg, mask, PROGRESS); | |
1453 | + return ((bil<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); | |
1454 | 1454 | else{ |
1455 | 1455 | std::cout << "ERROR: unidentified data type" << std::endl; |
1456 | 1456 | exit(1); |
... | ... | @@ -1458,9 +1458,9 @@ public: |
1458 | 1458 | } |
1459 | 1459 | else if (header.interleave == envi_header::BIP){ |
1460 | 1460 | if (header.data_type == envi_header::float32) |
1461 | - return ((bip<float>*)file)->co_matrix(co, avg, mask, PROGRESS); | |
1461 | + return ((bip<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); | |
1462 | 1462 | else if (header.data_type == envi_header::float64) |
1463 | - return ((bip<double>*)file)->co_matrix(co, avg, mask, PROGRESS); | |
1463 | + return ((bip<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS); | |
1464 | 1464 | else{ |
1465 | 1465 | std::cout << "ERROR: unidentified data type" << std::endl; |
1466 | 1466 | exit(1); |
... | ... | @@ -1474,7 +1474,7 @@ public: |
1474 | 1474 | /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix |
1475 | 1475 | /// @param avg is a pointer to memory of size B that stores the average spectrum |
1476 | 1476 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1477 | - bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){ | |
1477 | + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool use_gpu = true, bool PROGRESS = false){ | |
1478 | 1478 | if (header.interleave == envi_header::BSQ){ |
1479 | 1479 | std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl; |
1480 | 1480 | exit(1); |
... | ... | @@ -1488,9 +1488,9 @@ public: |
1488 | 1488 | |
1489 | 1489 | else if (header.interleave == envi_header::BIP){ |
1490 | 1490 | if (header.data_type == envi_header::float32) |
1491 | - return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); | |
1491 | + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS); | |
1492 | 1492 | else if (header.data_type == envi_header::float64) |
1493 | - return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); | |
1493 | + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS); | |
1494 | 1494 | else{ |
1495 | 1495 | std::cout << "ERROR: unidentified data type" << std::endl; |
1496 | 1496 | exit(1); | ... | ... |