Commit dc99699ec99be17c0d8ff6d09a15b86522d28732

Authored by David Mayerich
1 parent 87d0a1d2

implemented a no-gpu option

Showing 3 changed files with 88 additions and 82 deletions   Show diff stats
@@ -1126,34 +1126,36 @@ public: @@ -1126,34 +1126,36 @@ public:
1126 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix 1126 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1127 /// @param avg is a pointer to memory of size B that stores the average spectrum 1127 /// @param avg is a pointer to memory of size B that stores the average spectrum
1128 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location 1128 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1129 - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ 1129 + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
1130 progress = 0; 1130 progress = 0;
1131 1131
1132 - int dev_count;  
1133 - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices  
1134 - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices  
1135 - cudaDeviceProp prop;  
1136 - int best_device_id = 0; //stores the best CUDA device  
1137 - float best_device_cc = 0.0f; //stores the compute capability of the best device  
1138 - std::cout<<"CUDA devices:"<<std::endl;  
1139 - for(int d = 0; d < dev_count; d++){ //for each CUDA device  
1140 - cudaGetDeviceProperties(&prop, d); //get the property of the first device  
1141 - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability  
1142 - std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information  
1143 - if(cc > best_device_cc){  
1144 - best_device_cc = cc; //if this is better than the previous device, use it  
1145 - best_device_id = d;  
1146 - }  
1147 - } 1132 + if(use_gpu){
  1133 + int dev_count;
  1134 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1135 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1136 + cudaDeviceProp prop;
  1137 + int best_device_id = 0; //stores the best CUDA device
  1138 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1139 + std::cout<<"CUDA devices:"<<std::endl;
  1140 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1141 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1142 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1143 + std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1144 + if(cc > best_device_cc){
  1145 + best_device_cc = cc; //if this is better than the previous device, use it
  1146 + best_device_id = d;
  1147 + }
  1148 + }
1148 1149
1149 - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator  
1150 - std::cout<<"Using device "<<best_device_id<<std::endl;  
1151 - HANDLE_ERROR(cudaSetDevice(best_device_id));  
1152 - int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix  
1153 - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done  
1154 - } //otherwise continue using the CPU 1150 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1151 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1152 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1153 + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1154 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1155 + } //otherwise continue using the CPU
1155 1156
1156 - std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; 1157 + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1158 + }
1157 1159
1158 //memory allocation 1160 //memory allocation
1159 unsigned long long xy = X() * Y(); 1161 unsigned long long xy = X() * Y();
@@ -1044,34 +1044,36 @@ public: @@ -1044,34 +1044,36 @@ public:
1044 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix 1044 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1045 /// @param avg is a pointer to memory of size B that stores the average spectrum 1045 /// @param avg is a pointer to memory of size B that stores the average spectrum
1046 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location 1046 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1047 - bool co_matrix(double* co, double* avg, unsigned char *mask, bool PROGRESS = false){ 1047 + bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
1048 progress = 0; 1048 progress = 0;
1049 1049
1050 - int dev_count;  
1051 - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices  
1052 - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices  
1053 - cudaDeviceProp prop;  
1054 - int best_device_id = 0; //stores the best CUDA device  
1055 - float best_device_cc = 0.0f; //stores the compute capability of the best device  
1056 - std::cout<<"CUDA devices:"<<std::endl;  
1057 - for(int d = 0; d < dev_count; d++){ //for each CUDA device  
1058 - cudaGetDeviceProperties(&prop, d); //get the property of the first device  
1059 - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability  
1060 - std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information  
1061 - if(cc > best_device_cc){  
1062 - best_device_cc = cc; //if this is better than the previous device, use it  
1063 - best_device_id = d;  
1064 - }  
1065 - } 1050 + if(use_gpu){
  1051 + int dev_count;
  1052 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1053 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1054 + cudaDeviceProp prop;
  1055 + int best_device_id = 0; //stores the best CUDA device
  1056 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1057 + std::cout<<"CUDA devices----"<<std::endl;
  1058 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1059 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1060 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1061 + std::cout<<d<<": ["<<prop.major<<"."<<prop.minor<<"] "<<prop.name<<std::endl; //display the device information
  1062 + if(cc > best_device_cc){
  1063 + best_device_cc = cc; //if this is better than the previous device, use it
  1064 + best_device_id = d;
  1065 + }
  1066 + }
1066 1067
1067 - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator  
1068 - std::cout<<"Using device "<<best_device_id<<std::endl;  
1069 - HANDLE_ERROR(cudaSetDevice(best_device_id));  
1070 - int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix  
1071 - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done  
1072 - } //otherwise continue using the CPU 1068 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1069 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1070 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1071 + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1072 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1073 + } //otherwise continue using the CPU
1073 1074
1074 - std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; 1075 + std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1076 + }
1075 //memory allocation 1077 //memory allocation
1076 unsigned long long XY = X() * Y(); 1078 unsigned long long XY = X() * Y();
1077 unsigned long long B = Z(); 1079 unsigned long long B = Z();
@@ -1177,7 +1179,7 @@ public: @@ -1177,7 +1179,7 @@ public:
1177 } 1179 }
1178 } 1180 }
1179 1181
1180 - return true; 1182 + return 0;
1181 } 1183 }
1182 //#endif 1184 //#endif
1183 1185
@@ -1186,33 +1188,35 @@ public: @@ -1186,33 +1188,35 @@ public:
1186 /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix 1188 /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1187 /// @param avg is a pointer to memory of size B that stores the average spectrum 1189 /// @param avg is a pointer to memory of size B that stores the average spectrum
1188 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location 1190 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1189 - bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool PROGRESS = false){  
1190 -  
1191 - int dev_count;  
1192 - HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices  
1193 - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices  
1194 - cudaDeviceProp prop;  
1195 - int best_device_id = 0; //stores the best CUDA device  
1196 - float best_device_cc = 0.0f; //stores the compute capability of the best device  
1197 - std::cout<<"CUDA devices:"<<std::endl;  
1198 - for(int d = 0; d < dev_count; d++){ //for each CUDA device  
1199 - cudaGetDeviceProperties(&prop, d); //get the property of the first device  
1200 - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability  
1201 - std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information  
1202 - if(cc > best_device_cc){  
1203 - best_device_cc = cc; //if this is better than the previous device, use it  
1204 - best_device_id = d;  
1205 - }  
1206 - } 1191 + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
  1192 +
  1193 + if(use_gpu){
  1194 + int dev_count;
  1195 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1196 + std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1197 + cudaDeviceProp prop;
  1198 + int best_device_id = 0; //stores the best CUDA device
  1199 + float best_device_cc = 0.0f; //stores the compute capability of the best device
  1200 + std::cout<<"CUDA devices:"<<std::endl;
  1201 + for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1202 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1203 + float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1204 + std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
  1205 + if(cc > best_device_cc){
  1206 + best_device_cc = cc; //if this is better than the previous device, use it
  1207 + best_device_id = d;
  1208 + }
  1209 + }
1207 1210
1208 - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator  
1209 - std::cout<<"Using device "<<best_device_id<<std::endl;  
1210 - HANDLE_ERROR(cudaSetDevice(best_device_id));  
1211 - int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix  
1212 - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done  
1213 - } //otherwise continue using the CPU 1211 + if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
  1212 + std::cout<<"Using device "<<best_device_id<<std::endl;
  1213 + HANDLE_ERROR(cudaSetDevice(best_device_id));
  1214 + int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1215 + if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1216 + } //otherwise continue using the CPU
1214 1217
1215 - std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl; 1218 + std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl;
  1219 + }
1216 1220
1217 progress = 0; 1221 progress = 0;
1218 //memory allocation 1222 //memory allocation
@@ -1441,16 +1441,16 @@ public: @@ -1441,16 +1441,16 @@ public:
1441 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix 1441 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1442 /// @param avg is a pointer to memory of size B that stores the average spectrum 1442 /// @param avg is a pointer to memory of size B that stores the average spectrum
1443 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location 1443 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1444 - bool co_matrix(double* co, double* avg, unsigned char* mask, bool PROGRESS = false){ 1444 + bool co_matrix(double* co, double* avg, unsigned char* mask, bool use_gpu, bool PROGRESS = false){
1445 if (header.interleave == envi_header::BSQ){ 1445 if (header.interleave == envi_header::BSQ){
1446 std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl; 1446 std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl;
1447 exit(1); 1447 exit(1);
1448 } 1448 }
1449 else if (header.interleave == envi_header::BIL){ 1449 else if (header.interleave == envi_header::BIL){
1450 if (header.data_type == envi_header::float32) 1450 if (header.data_type == envi_header::float32)
1451 - return ((bil<float>*)file)->co_matrix(co, avg, mask, PROGRESS); 1451 + return ((bil<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1452 else if (header.data_type == envi_header::float64) 1452 else if (header.data_type == envi_header::float64)
1453 - return ((bil<double>*)file)->co_matrix(co, avg, mask, PROGRESS); 1453 + return ((bil<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1454 else{ 1454 else{
1455 std::cout << "ERROR: unidentified data type" << std::endl; 1455 std::cout << "ERROR: unidentified data type" << std::endl;
1456 exit(1); 1456 exit(1);
@@ -1458,9 +1458,9 @@ public: @@ -1458,9 +1458,9 @@ public:
1458 } 1458 }
1459 else if (header.interleave == envi_header::BIP){ 1459 else if (header.interleave == envi_header::BIP){
1460 if (header.data_type == envi_header::float32) 1460 if (header.data_type == envi_header::float32)
1461 - return ((bip<float>*)file)->co_matrix(co, avg, mask, PROGRESS); 1461 + return ((bip<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1462 else if (header.data_type == envi_header::float64) 1462 else if (header.data_type == envi_header::float64)
1463 - return ((bip<double>*)file)->co_matrix(co, avg, mask, PROGRESS); 1463 + return ((bip<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
1464 else{ 1464 else{
1465 std::cout << "ERROR: unidentified data type" << std::endl; 1465 std::cout << "ERROR: unidentified data type" << std::endl;
1466 exit(1); 1466 exit(1);
@@ -1474,7 +1474,7 @@ public: @@ -1474,7 +1474,7 @@ public:
1474 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix 1474 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1475 /// @param avg is a pointer to memory of size B that stores the average spectrum 1475 /// @param avg is a pointer to memory of size B that stores the average spectrum
1476 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location 1476 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1477 - bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool PROGRESS = false){ 1477 + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool use_gpu = true, bool PROGRESS = false){
1478 if (header.interleave == envi_header::BSQ){ 1478 if (header.interleave == envi_header::BSQ){
1479 std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl; 1479 std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl;
1480 exit(1); 1480 exit(1);
@@ -1488,9 +1488,9 @@ public: @@ -1488,9 +1488,9 @@ public:
1488 1488
1489 else if (header.interleave == envi_header::BIP){ 1489 else if (header.interleave == envi_header::BIP){
1490 if (header.data_type == envi_header::float32) 1490 if (header.data_type == envi_header::float32)
1491 - return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); 1491 + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
1492 else if (header.data_type == envi_header::float64) 1492 else if (header.data_type == envi_header::float64)
1493 - return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, PROGRESS); 1493 + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
1494 else{ 1494 else{
1495 std::cout << "ERROR: unidentified data type" << std::endl; 1495 std::cout << "ERROR: unidentified data type" << std::endl;
1496 exit(1); 1496 exit(1);