Commit dc99699ec99be17c0d8ff6d09a15b86522d28732

Authored by David Mayerich
1 parent 87d0a1d2

implemented a no-gpu option

Showing 3 changed files with 88 additions and 82 deletions   Show diff stats
stim/envi/bil.h
... ... @@ -1126,34 +1126,36 @@ public:
1126 1126 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1127 1127 /// @param avg is a pointer to memory of size B that stores the average spectrum
1128 1128 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1129   - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
  1129 + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
1130 1130 progress = 0;
1131 1131  
1132   - int dev_count;
1133   - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
1134   - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
1135   - cudaDeviceProp prop;
1136   - int best_device_id = 0; //stores the best CUDA device
1137   - float best_device_cc = 0.0f; //stores the compute capability of the best device
1138   - std::cout<<"CUDA devices:"<<std::endl;
1139   - for(int d = 0; d < dev_count; d++){ //for each CUDA device
1140   - cudaGetDeviceProperties(&prop, d); //get the property of the first device
1141   - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
1142   - std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
1143   - if(cc > best_device_cc){
1144   - best_device_cc = cc; //if this is better than the previous device, use it
1145   - best_device_id = d;
1146   - }
1147   - }
  1132 + if(use_gpu){
  1133 + int dev_count;
  1134 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1135 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1136 + cudaDeviceProp prop;
  1137 + int best_device_id = 0; //stores the best CUDA device
  1138 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1139 + std::cout<<"CUDA devices:"<<std::endl;
  1140 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1141 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1142 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1143 + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1144 + if(cc > best_device_cc){
  1145 + best_device_cc = cc; //if this is better than the previous device, use it
  1146 + best_device_id = d;
  1147 + }
  1148 + }
1148 1149  
1149   - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
1150   - std::cout<<"Using device "<<best_device_id<<std::endl;
1151   - HANDLE_ERROR(cudaSetDevice(best_device_id));
1152   - int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1153   - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
1154   - } //otherwise continue using the CPU
  1150 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1151 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1152 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1153 + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1154 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1155 + } //otherwise continue using the CPU
1155 1156  
1156   - std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1157 + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1158 + }
1157 1159  
1158 1160 //memory allocation
1159 1161 unsigned long long xy = X() * Y();
... ...
stim/envi/bip.h
... ... @@ -1044,34 +1044,36 @@ public:
1044 1044 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1045 1045 /// @param avg is a pointer to memory of size B that stores the average spectrum
1046 1046 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1047   - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){
  1047 + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
1048 1048 progress = 0;
1049 1049  
1050   - int dev_count;
1051   - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
1052   - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
1053   - cudaDeviceProp prop;
1054   - int best_device_id = 0; //stores the best CUDA device
1055   - float best_device_cc = 0.0f; //stores the compute capability of the best device
1056   - std::cout<<"CUDA devices:"<<std::endl;
1057   - for(int d = 0; d < dev_count; d++){ //for each CUDA device
1058   - cudaGetDeviceProperties(&prop, d); //get the property of the first device
1059   - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
1060   - std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
1061   - if(cc > best_device_cc){
1062   - best_device_cc = cc; //if this is better than the previous device, use it
1063   - best_device_id = d;
1064   - }
1065   - }
  1050 + if(use_gpu){
  1051 + int dev_count;
  1052 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1053 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1054 + cudaDeviceProp prop;
  1055 + int best_device_id = 0; //stores the best CUDA device
  1056 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1057 + std::cout<<"CUDA devices----"<<std::endl;
  1058 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1059 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1060 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1061 + std::cout<<d<<": ["<<prop.major<<"."<<prop.minor<<"] "<<prop.name<<std::endl; //display the device information
  1062 + if(cc > best_device_cc){
  1063 + best_device_cc = cc; //if this is better than the previous device, use it
  1064 + best_device_id = d;
  1065 + }
  1066 + }
1066 1067  
1067   - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
1068   - std::cout<<"Using device "<<best_device_id<<std::endl;
1069   - HANDLE_ERROR(cudaSetDevice(best_device_id));
1070   - int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1071   - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
1072   - } //otherwise continue using the CPU
  1068 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1069 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1070 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1071 + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1072 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1073 + } //otherwise continue using the CPU
1073 1074  
1074   - std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1075 + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1076 + }
1075 1077 //memory allocation
1076 1078 unsigned long long XY = X() * Y();
1077 1079 unsigned long long B = Z();
... ... @@ -1177,7 +1179,7 @@ public:
1177 1179 }
1178 1180 }
1179 1181  
1180   - return true;
  1182 + return 0;
1181 1183 }
1182 1184 //#endif
1183 1185  
... ... @@ -1186,33 +1188,35 @@ public:
1186 1188 /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1187 1189 /// @param avg is a pointer to memory of size B that stores the average spectrum
1188 1190 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1189   - bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){
1190   -
1191   - int dev_count;
1192   - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
1193   - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
1194   - cudaDeviceProp prop;
1195   - int best_device_id = 0; //stores the best CUDA device
1196   - float best_device_cc = 0.0f; //stores the compute capability of the best device
1197   - std::cout<<"CUDA devices:"<<std::endl;
1198   - for(int d = 0; d < dev_count; d++){ //for each CUDA device
1199   - cudaGetDeviceProperties(&prop, d); //get the property of the first device
1200   - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
1201   - std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
1202   - if(cc > best_device_cc){
1203   - best_device_cc = cc; //if this is better than the previous device, use it
1204   - best_device_id = d;
1205   - }
1206   - }
  1191 + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
  1192 +
  1193 + if(use_gpu){
  1194 + int dev_count;
  1195 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1196 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1197 + cudaDeviceProp prop;
  1198 + int best_device_id = 0; //stores the best CUDA device
  1199 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1200 + std::cout<<"CUDA devices:"<<std::endl;
  1201 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1202 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1203 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1204 + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1205 + if(cc > best_device_cc){
  1206 + best_device_cc = cc; //if this is better than the previous device, use it
  1207 + best_device_id = d;
  1208 + }
  1209 + }
1207 1210  
1208   - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
1209   - std::cout<<"Using device "<<best_device_id<<std::endl;
1210   - HANDLE_ERROR(cudaSetDevice(best_device_id));
1211   - int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1212   - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
1213   - } //otherwise continue using the CPU
  1211 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1212 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1213 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1214 + int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1215 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1216 + } //otherwise continue using the CPU
1214 1217  
1215   - std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl;
  1218 + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl;
  1219 + }
1216 1220  
1217 1221 progress = 0;
1218 1222 //memory allocation
... ...
stim/envi/envi.h
... ... @@ -1441,16 +1441,16 @@ public:
1441 1441 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1442 1442 /// @param avg is a pointer to memory of size B that stores the average spectrum
1443 1443 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1444   - bool co_matrix(double* co, double* avg, unsigned char* mask, bool PROGRESS = false){
  1444 + bool co_matrix(double* co, double* avg, unsigned char* mask, bool use_gpu, bool PROGRESS = false){
1445 1445 if (header.interleave == envi_header::BSQ){
1446 1446 std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl;
1447 1447 exit(1);
1448 1448 }
1449 1449 else if (header.interleave == envi_header::BIL){
1450 1450 if (header.data_type == envi_header::float32)
1451   - return ((bil<float>*)file)->co_matrix(co, avg, mask, PROGRESS);
  1451 + return ((bil<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1452 1452 else if (header.data_type == envi_header::float64)
1453   - return ((bil<double>*)file)->co_matrix(co, avg, mask, PROGRESS);
  1453 + return ((bil<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1454 1454 else{
1455 1455 std::cout << "ERROR: unidentified data type" << std::endl;
1456 1456 exit(1);
... ... @@ -1458,9 +1458,9 @@ public:
1458 1458 }
1459 1459 else if (header.interleave == envi_header::BIP){
1460 1460 if (header.data_type == envi_header::float32)
1461   - return ((bip<float>*)file)->co_matrix(co, avg, mask, PROGRESS);
  1461 + return ((bip<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1462 1462 else if (header.data_type == envi_header::float64)
1463   - return ((bip<double>*)file)->co_matrix(co, avg, mask, PROGRESS);
  1463 + return ((bip<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1464 1464 else{
1465 1465 std::cout << "ERROR: unidentified data type" << std::endl;
1466 1466 exit(1);
... ... @@ -1474,7 +1474,7 @@ public:
1474 1474 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1475 1475 /// @param avg is a pointer to memory of size B that stores the average spectrum
1476 1476 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1477   - bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){
  1477 + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool use_gpu = true, bool PROGRESS = false){
1478 1478 if (header.interleave == envi_header::BSQ){
1479 1479 std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl;
1480 1480 exit(1);
... ... @@ -1488,9 +1488,9 @@ public:
1488 1488  
1489 1489 else if (header.interleave == envi_header::BIP){
1490 1490 if (header.data_type == envi_header::float32)
1491   - return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
  1491 + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
1492 1492 else if (header.data_type == envi_header::float64)
1493   - return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS);
  1493 + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
1494 1494 else{
1495 1495 std::cout << "ERROR: unidentified data type" << std::endl;
1496 1496 exit(1);
... ...