Commit 1c92dffecd442c80bf3562f2958b9bd1aa775ac0
1 parent
f78ceb64
simplified CUDA implementation in covariance and projection calculations
Also updated stim::matrix to allow re-arranging of rows and columns, as well as saving to CSV files.
Showing
2 changed files
with
95 additions
and
101 deletions
Show diff stats
stim/envi/bip.h
@@ -1015,7 +1015,7 @@ public: | @@ -1015,7 +1015,7 @@ public: | ||
1015 | stat = cublasCreate(&handle); //create a cuBLAS instance | 1015 | stat = cublasCreate(&handle); //create a cuBLAS instance |
1016 | if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid | 1016 | if (stat != CUBLAS_STATUS_SUCCESS) return 1; //test the cuBLAS instance to make sure it is valid |
1017 | 1017 | ||
1018 | - else std::cout<<"Using cuBLAS to calculate the mean covariance matrix..."<<std::endl; | 1018 | + //else std::cout<<"Using cuBLAS to calculate the mean covariance matrix..."<<std::endl; |
1019 | for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel | 1019 | for (unsigned long long xy = 0; xy < XY; xy++){ //for each pixel |
1020 | if (mask == NULL || mask[xy] != 0){ | 1020 | if (mask == NULL || mask[xy] != 0){ |
1021 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location | 1021 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location |
@@ -1054,31 +1054,17 @@ public: | @@ -1054,31 +1054,17 @@ public: | ||
1054 | if(cuda_device >= 0){ //if a CUDA device is specified | 1054 | if(cuda_device >= 0){ //if a CUDA device is specified |
1055 | int dev_count; | 1055 | int dev_count; |
1056 | HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | 1056 | HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices |
1057 | - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | ||
1058 | - cudaDeviceProp prop; | ||
1059 | - //int best_device_id = 0; //stores the best CUDA device | ||
1060 | - //float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1061 | - std::cout<<"CUDA devices----"<<std::endl; | ||
1062 | - for(int d = 0; d < dev_count; d++){ //for each CUDA device | ||
1063 | - cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1064 | - //float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1065 | - std::cout<<d<<": ["<<prop.major<<"."<<prop.minor<<"] "<<prop.name<<std::endl; //display the device information | ||
1066 | - //if(cc > best_device_cc){ | ||
1067 | - // best_device_cc = cc; //if this is better than the previous device, use it | ||
1068 | - // best_device_id = d; | ||
1069 | - //} | ||
1070 | - } | ||
1071 | if(dev_count > 0 && dev_count > cuda_device){ //if the first device is not an emulator | 1057 | if(dev_count > 0 && dev_count > cuda_device){ //if the first device is not an emulator |
1058 | + cudaDeviceProp prop; | ||
1072 | cudaGetDeviceProperties(&prop, cuda_device); //get the property of the requested CUDA device | 1059 | cudaGetDeviceProperties(&prop, cuda_device); //get the property of the requested CUDA device |
1073 | if (prop.major != 9999) { | 1060 | if (prop.major != 9999) { |
1074 | - std::cout << "Using device " << cuda_device << std::endl; | 1061 | + std::cout << "Using CUDA device [" << cuda_device << "] to calculate the mean covariance matrix..."<<std::endl; |
1075 | HANDLE_ERROR(cudaSetDevice(cuda_device)); | 1062 | HANDLE_ERROR(cudaSetDevice(cuda_device)); |
1076 | int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | 1063 | int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix |
1077 | if (status == 0) return true; //if the cuBLAS function returned correctly, we're done | 1064 | if (status == 0) return true; //if the cuBLAS function returned correctly, we're done |
1078 | } | 1065 | } |
1079 | - } //otherwise continue using the CPU | ||
1080 | - | ||
1081 | - std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl; | 1066 | + } //otherwise continue using the CPU |
1067 | + std::cout<<"WARNING: cuBLAS failed, using CPU"<<std::endl; | ||
1082 | } | 1068 | } |
1083 | //memory allocation | 1069 | //memory allocation |
1084 | unsigned long long XY = X() * Y(); | 1070 | unsigned long long XY = X() * Y(); |
@@ -1196,32 +1182,20 @@ public: | @@ -1196,32 +1182,20 @@ public: | ||
1196 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location | 1182 | /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location |
1197 | bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, int cuda_device = 0, bool PROGRESS = false){ | 1183 | bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, int cuda_device = 0, bool PROGRESS = false){ |
1198 | 1184 | ||
1199 | - if(cuda_device >= 0){ | 1185 | + if (cuda_device >= 0) { //if a CUDA device is specified |
1200 | int dev_count; | 1186 | int dev_count; |
1201 | HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | 1187 | HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices |
1202 | - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices | ||
1203 | - cudaDeviceProp prop; | ||
1204 | - //int best_device_id = 0; //stores the best CUDA device | ||
1205 | - //float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1206 | - std::cout<<"CUDA devices:"<<std::endl; | ||
1207 | - for(int d = 0; d < dev_count; d++){ //for each CUDA device | ||
1208 | - cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1209 | - //float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1210 | - std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information | ||
1211 | - //if(cc > best_device_cc){ | ||
1212 | - // best_device_cc = cc; //if this is better than the previous device, use it | ||
1213 | - // best_device_id = d; | ||
1214 | - //} | ||
1215 | - } | ||
1216 | - | ||
1217 | - if(dev_count > 0 && prop.major != 9999 && dev_count > cuda_device){ //if the first device is not an emulator | ||
1218 | - std::cout<<"Using device "<< cuda_device <<std::endl; | ||
1219 | - HANDLE_ERROR(cudaSetDevice(cuda_device)); | ||
1220 | - int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1221 | - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done | ||
1222 | - } //otherwise continue using the CPU | ||
1223 | - | ||
1224 | - std::cout<<"cuBLAS initialization failed - using CPU"<<std::endl; | 1188 | + if (dev_count > 0 && dev_count > cuda_device) { //if the first device is not an emulator |
1189 | + cudaDeviceProp prop; | ||
1190 | + cudaGetDeviceProperties(&prop, cuda_device); //get the property of the requested CUDA device | ||
1191 | + if (prop.major != 9999) { | ||
1192 | + std::cout << "Using CUDA device [" << cuda_device << "] to calculate the noise covariance matrix..." << std::endl; | ||
1193 | + HANDLE_ERROR(cudaSetDevice(cuda_device)); | ||
1194 | + int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1195 | + if (status == 0) return true; //if the cuBLAS function returned correctly, we're done | ||
1196 | + } | ||
1197 | + } //otherwise continue using the CPU | ||
1198 | + std::cout << "WARNING: cuBLAS failed, using CPU" << std::endl; | ||
1225 | } | 1199 | } |
1226 | 1200 | ||
1227 | progress = 0; | 1201 | progress = 0; |
@@ -1277,8 +1251,8 @@ public: | @@ -1277,8 +1251,8 @@ public: | ||
1277 | /// @param mask is a character mask used to limit processing to valid pixels | 1251 | /// @param mask is a character mask used to limit processing to valid pixels |
1278 | bool project_cublas(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){ | 1252 | bool project_cublas(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){ |
1279 | 1253 | ||
1280 | - cudaError_t cudaStat; | ||
1281 | - cublasStatus_t stat; | 1254 | + //cudaError_t cudaStat; |
1255 | + //cublasStatus_t stat; | ||
1282 | cublasHandle_t handle; | 1256 | cublasHandle_t handle; |
1283 | 1257 | ||
1284 | std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file | 1258 | std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file |
@@ -1289,12 +1263,12 @@ public: | @@ -1289,12 +1263,12 @@ public: | ||
1289 | 1263 | ||
1290 | double* s = (double*)malloc(sizeof(double) * B); //allocate space for the spectrum that will be pulled from the file | 1264 | double* s = (double*)malloc(sizeof(double) * B); //allocate space for the spectrum that will be pulled from the file |
1291 | double* s_dev; //declare a device pointer that will store the spectrum on the GPU | 1265 | double* s_dev; //declare a device pointer that will store the spectrum on the GPU |
1292 | - cudaStat = cudaMalloc(&s_dev, B * sizeof(double)); //allocate space on the CUDA device for the spectrum | 1266 | + HANDLE_ERROR(cudaMalloc(&s_dev, B * sizeof(double))); //allocate space on the CUDA device for the spectrum |
1293 | 1267 | ||
1294 | 1268 | ||
1295 | double* basis_dev; // device pointer on the GPU | 1269 | double* basis_dev; // device pointer on the GPU |
1296 | - cudaStat = cudaMalloc(&basis_dev, M * B * sizeof(double)); // allocate space on the CUDA device | ||
1297 | - cudaStat = cudaMemset(basis_dev, 0, M * B * sizeof(double)); // initialize basis_dev to zero (0) | 1270 | + HANDLE_ERROR(cudaMalloc(&basis_dev, M * B * sizeof(double))); // allocate space on the CUDA device |
1271 | + HANDLE_ERROR(cudaMemset(basis_dev, 0, M * B * sizeof(double))); // initialize basis_dev to zero (0) | ||
1298 | 1272 | ||
1299 | 1273 | ||
1300 | /// transposing basis matrix (because cuBLAS is column-major) | 1274 | /// transposing basis matrix (because cuBLAS is column-major) |
@@ -1303,28 +1277,24 @@ public: | @@ -1303,28 +1277,24 @@ public: | ||
1303 | for (int i = 0; i<M; i++) | 1277 | for (int i = 0; i<M; i++) |
1304 | for (int j = 0; j<B; j++) | 1278 | for (int j = 0; j<B; j++) |
1305 | basis_Transposed[i+j*M] = basis[i*B+j]; | 1279 | basis_Transposed[i+j*M] = basis[i*B+j]; |
1280 | + //copy the basis_Transposed matrix to the CUDA device (both matrices are stored in column-major format) | ||
1281 | + CUBLAS_HANDLE_ERROR(cublasSetMatrix((int)M, (int)B, sizeof(double),basis_Transposed, (int)M, basis_dev, (int)M)); | ||
1306 | 1282 | ||
1307 | - stat = cublasSetMatrix((int)M, (int)B, sizeof(double),basis_Transposed, (int)M, basis_dev, (int)M); //copy the basis_Transposed matrix to the CUDA device (both matrices are stored in column-major format) | ||
1308 | - | ||
1309 | - double* center_dev; //declare a device pointer that will store the center (average) | ||
1310 | - cudaStat = cudaMalloc(¢er_dev, B * sizeof(double)); //allocate space on the CUDA device for the center (average) | ||
1311 | - stat = cublasSetVector((int)B, sizeof(double), center, 1, center_dev, 1); //copy the center vector (average) to the CUDA device (from host to device) | 1283 | + double* center_dev; //declare a device pointer that will store the center (average) |
1284 | + HANDLE_ERROR(cudaMalloc(¢er_dev, B * sizeof(double))); //allocate space on the CUDA device for the center (average) | ||
1285 | + CUBLAS_HANDLE_ERROR(cublasSetVector((int)B, sizeof(double), center, 1, center_dev, 1)); //copy the center vector (average) to the CUDA device (from host to device) | ||
1312 | 1286 | ||
1313 | 1287 | ||
1314 | - double* A = (double*)malloc(sizeof(double) * M); //allocate space for the projected pixel on the host | ||
1315 | - double* A_dev; //declare a device pointer that will store the projected pixel on the GPU | ||
1316 | - cudaStat = cudaMalloc(&A_dev,M * sizeof(double)); //allocate space on the CUDA device for the projected pixel | ||
1317 | - cudaStat = cudaMemset(A_dev, 0,M * sizeof(double)); //initialize the projected pixel to zero (0) | 1288 | + double* A = (double*)malloc(sizeof(double) * M); //allocate space for the projected pixel on the host |
1289 | + double* A_dev; //declare a device pointer that will store the projected pixel on the GPU | ||
1290 | + HANDLE_ERROR(cudaMalloc(&A_dev,M * sizeof(double))); //allocate space on the CUDA device for the projected pixel | ||
1291 | + HANDLE_ERROR(cudaMemset(A_dev, 0,M * sizeof(double))); //initialize the projected pixel to zero (0) | ||
1318 | 1292 | ||
1319 | double axpy_alpha = -1; //multiplication factor for the center (in order to perform a subtraction) | 1293 | double axpy_alpha = -1; //multiplication factor for the center (in order to perform a subtraction) |
1320 | double axpy_alpha2 = 1; //multiplication factor for the matrix-vector multiplication | 1294 | double axpy_alpha2 = 1; //multiplication factor for the matrix-vector multiplication |
1321 | double axpy_beta = 0; //multiplication factor for the matrix-vector multiplication (there is no second scalor) | 1295 | double axpy_beta = 0; //multiplication factor for the matrix-vector multiplication (there is no second scalor) |
1322 | 1296 | ||
1323 | - stat = cublasCreate(&handle); //create a cuBLAS instance | ||
1324 | - if (stat != CUBLAS_STATUS_SUCCESS) { //test the cuBLAS instance to make sure it is valid | ||
1325 | - printf ("CUBLAS initialization failed\n"); | ||
1326 | - return EXIT_FAILURE; | ||
1327 | - } | 1297 | + CUBLAS_HANDLE_ERROR(cublasCreate(&handle)); //create a cuBLAS instance |
1328 | 1298 | ||
1329 | T* temp = (T*)malloc(sizeof(T) * M); //allocate space for the projected pixel to be written on the disc | 1299 | T* temp = (T*)malloc(sizeof(T) * M); //allocate space for the projected pixel to be written on the disc |
1330 | size_t i; | 1300 | size_t i; |
@@ -1332,14 +1302,11 @@ public: | @@ -1332,14 +1302,11 @@ public: | ||
1332 | if (mask == NULL || mask[xy] != 0){ | 1302 | if (mask == NULL || mask[xy] != 0){ |
1333 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location | 1303 | pixeld(s, xy); //retreive the spectrum at the current xy pixel location |
1334 | 1304 | ||
1335 | - stat = cublasSetVector((int)B, sizeof(double), s, 1, s_dev, 1); //copy the spectrum from the host to the device | ||
1336 | - stat = cublasDaxpy(handle, (int)B, &axpy_alpha, center_dev, 1, s_dev, 1); //subtract the center (average) | ||
1337 | - stat = cublasDgemv(handle,CUBLAS_OP_N,(int)M,(int)B,&axpy_alpha2,basis_dev,(int)M,s_dev,1,&axpy_beta,A_dev,1); //performs the matrix-vector multiplication | ||
1338 | - stat = cublasGetVector((int)M, sizeof(double), A_dev, 1, A, 1); //copy the projected pixel to the host (from GPU to CPU) | ||
1339 | - | ||
1340 | - //stat = cublasGetVector((int)B, sizeof(double), s_dev, 1, A, 1); //copy the projected pixel to the host (from GPU to CPU) | ||
1341 | - | ||
1342 | - //std::copy<double*, T*>(A, A + M, temp); | 1305 | + CUBLAS_HANDLE_ERROR(cublasSetVector((int)B, sizeof(double), s, 1, s_dev, 1)); //copy the spectrum from the host to the device |
1306 | + CUBLAS_HANDLE_ERROR(cublasDaxpy(handle, (int)B, &axpy_alpha, center_dev, 1, s_dev, 1)); //subtract the center (average) | ||
1307 | + CUBLAS_HANDLE_ERROR(cublasDgemv(handle,CUBLAS_OP_N,(int)M,(int)B,&axpy_alpha2,basis_dev,(int)M,s_dev,1,&axpy_beta,A_dev,1)); //performs the matrix-vector multiplication | ||
1308 | + CUBLAS_HANDLE_ERROR(cublasGetVector((int)M, sizeof(double), A_dev, 1, A, 1)); //copy the projected pixel to the host (from GPU to CPU) | ||
1309 | + | ||
1343 | for(i = 0; i < M; i++) temp[i] = (T)A[i]; //casting projected pixel from double to whatever T is | 1310 | for(i = 0; i < M; i++) temp[i] = (T)A[i]; //casting projected pixel from double to whatever T is |
1344 | } | 1311 | } |
1345 | else | 1312 | else |
@@ -1351,10 +1318,11 @@ public: | @@ -1351,10 +1318,11 @@ public: | ||
1351 | } | 1318 | } |
1352 | 1319 | ||
1353 | //clean up allocated device memory | 1320 | //clean up allocated device memory |
1354 | - cudaFree(A_dev); | ||
1355 | - cudaFree(s_dev); | ||
1356 | - cudaFree(basis_dev); | ||
1357 | - cudaFree(center_dev); | 1321 | + HANDLE_ERROR(cudaFree(A_dev)); |
1322 | + HANDLE_ERROR(cudaFree(s_dev)); | ||
1323 | + HANDLE_ERROR(cudaFree(basis_dev)); | ||
1324 | + HANDLE_ERROR(cudaFree(center_dev)); | ||
1325 | + CUBLAS_HANDLE_ERROR(cublasDestroy(handle)); | ||
1358 | free(A); | 1326 | free(A); |
1359 | free(s); | 1327 | free(s); |
1360 | free(temp); | 1328 | free(temp); |
@@ -1370,29 +1338,19 @@ public: | @@ -1370,29 +1338,19 @@ public: | ||
1370 | /// @param M is the number of basis vectors | 1338 | /// @param M is the number of basis vectors |
1371 | /// @param mask is a character mask used to limit processing to valid pixels | 1339 | /// @param mask is a character mask used to limit processing to valid pixels |
1372 | bool project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, int cuda_device = 0, bool PROGRESS = false){ | 1340 | bool project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, int cuda_device = 0, bool PROGRESS = false){ |
1373 | - | ||
1374 | if (cuda_device >= 0) { //if a CUDA device is specified | 1341 | if (cuda_device >= 0) { //if a CUDA device is specified |
1375 | int dev_count; | 1342 | int dev_count; |
1376 | HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices | 1343 | HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices |
1377 | - std::cout << "Number of CUDA devices: " << dev_count << std::endl; //output the number of CUDA devices | ||
1378 | - cudaDeviceProp prop; | ||
1379 | - //int best_device_id = 0; //stores the best CUDA device | ||
1380 | - //float best_device_cc = 0.0f; //stores the compute capability of the best device | ||
1381 | - std::cout << "CUDA devices----" << std::endl; | ||
1382 | - for (int d = 0; d < dev_count; d++) { //for each CUDA device | ||
1383 | - cudaGetDeviceProperties(&prop, d); //get the property of the first device | ||
1384 | - //float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability | ||
1385 | - std::cout << d << ": [" << prop.major << "." << prop.minor << "] " << prop.name << std::endl; //display the device information | ||
1386 | - } | ||
1387 | if (dev_count > 0 && dev_count > cuda_device) { //if the first device is not an emulator | 1344 | if (dev_count > 0 && dev_count > cuda_device) { //if the first device is not an emulator |
1345 | + cudaDeviceProp prop; | ||
1388 | cudaGetDeviceProperties(&prop, cuda_device); //get the property of the requested CUDA device | 1346 | cudaGetDeviceProperties(&prop, cuda_device); //get the property of the requested CUDA device |
1389 | if (prop.major != 9999) { | 1347 | if (prop.major != 9999) { |
1390 | - std::cout << "Using device " << cuda_device << std::endl; | 1348 | + std::cout << "Using CUDA device [" << cuda_device << "] to perform a basis projection..." << std::endl; |
1391 | HANDLE_ERROR(cudaSetDevice(cuda_device)); | 1349 | HANDLE_ERROR(cudaSetDevice(cuda_device)); |
1392 | - int status = project_cublas(outfile, center, basis, M, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix | ||
1393 | - if (status == 0) return true; //if the cuBLAS function returned correctly, we're done | 1350 | + return project_cublas(outfile, center, basis, M, mask, PROGRESS); |
1394 | } | 1351 | } |
1395 | - } //otherwise continue using the CPU | 1352 | + } //otherwise continue using the CPU |
1353 | + std::cout << "WARNING: cuBLAS failed, using CPU" << std::endl; | ||
1396 | } | 1354 | } |
1397 | 1355 | ||
1398 | std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file | 1356 | std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file |
stim/math/matrix.h
@@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
6 | #include <iostream> | 6 | #include <iostream> |
7 | #include <stim/math/vector.h> | 7 | #include <stim/math/vector.h> |
8 | #include <stim/math/vec3.h> | 8 | #include <stim/math/vec3.h> |
9 | -#include <stim/cuda/cudatools/callable.h> | 9 | +//#include <stim/cuda/cudatools/callable.h> |
10 | 10 | ||
11 | namespace stim{ | 11 | namespace stim{ |
12 | 12 | ||
@@ -56,7 +56,7 @@ public: | @@ -56,7 +56,7 @@ public: | ||
56 | return C; | 56 | return C; |
57 | } | 57 | } |
58 | 58 | ||
59 | - T& operator()(int row, int col) { | 59 | + T& operator()(size_t row, size_t col) { |
60 | return at(row, col); | 60 | return at(row, col); |
61 | } | 61 | } |
62 | 62 | ||
@@ -122,10 +122,11 @@ public: | @@ -122,10 +122,11 @@ public: | ||
122 | 122 | ||
123 | matrix<T> result(R, rhs.C); //create the output matrix | 123 | matrix<T> result(R, rhs.C); //create the output matrix |
124 | T inner; //stores the running inner product | 124 | T inner; //stores the running inner product |
125 | - for(size_t c = 0; c < rhs.C; c++){ | ||
126 | - for(size_t r = 0; r < R; r++){ | 125 | + size_t c, r, i; |
126 | + for(c = 0; c < rhs.C; c++){ | ||
127 | + for(r = 0; r < R; r++){ | ||
127 | inner = (T)0; | 128 | inner = (T)0; |
128 | - for(size_t i = 0; i < C; i++){ | 129 | + for(i = 0; i < C; i++){ |
129 | inner += at(r, i) * rhs.at(i, c); | 130 | inner += at(r, i) * rhs.at(i, c); |
130 | } | 131 | } |
131 | result.M[c * R + r] = inner; | 132 | result.M[c * R + r] = inner; |
@@ -151,23 +152,58 @@ public: | @@ -151,23 +152,58 @@ public: | ||
151 | return result; | 152 | return result; |
152 | } | 153 | } |
153 | 154 | ||
154 | - std::string toStr() | ||
155 | - { | 155 | + /// Sort rows of the matrix by the specified indices |
156 | + matrix<T> sort_rows(size_t* idx) { | ||
157 | + matrix<T> result(C, R); //create the output matrix | ||
158 | + size_t r, c; | ||
159 | + for (c = 0; c < C; c++) { //for each column | ||
160 | + for (r = 0; r < R; r++) { //for each row element | ||
161 | + result.M[c * R + r] = M[c * R + idx[r]]; //copy each element of the row into its new position | ||
162 | + } | ||
163 | + } | ||
164 | + return result; | ||
165 | + } | ||
166 | + | ||
167 | + /// Sort columns of the matrix by the specified indices | ||
168 | + matrix<T> sort_cols(size_t* idx) { | ||
169 | + matrix<T> result(C, R); | ||
170 | + size_t c; | ||
171 | + for (c = 0; c < C; c++) { //for each column | ||
172 | + memcpy(&result.M[c * R], &M[idx[c] * R], sizeof(T) * R); //copy the entire column from this matrix to the appropriate location | ||
173 | + } | ||
174 | + return result; | ||
175 | + } | ||
176 | + | ||
177 | + std::string toStr() { | ||
156 | std::stringstream ss; | 178 | std::stringstream ss; |
157 | 179 | ||
158 | - for(int r = 0; r < R; r++) | ||
159 | - { | 180 | + for(int r = 0; r < R; r++) { |
160 | ss << "| "; | 181 | ss << "| "; |
161 | - for(int c=0; c<C; c++) | ||
162 | - { | 182 | + for(int c=0; c<C; c++) { |
163 | ss << M[c * R + r] << " "; | 183 | ss << M[c * R + r] << " "; |
164 | } | 184 | } |
165 | ss << "|" << std::endl; | 185 | ss << "|" << std::endl; |
166 | } | 186 | } |
167 | - | ||
168 | return ss.str(); | 187 | return ss.str(); |
169 | } | 188 | } |
170 | 189 | ||
190 | + std::string csv() { | ||
191 | + std::stringstream csvss; | ||
192 | + for (size_t i = 0; i < B; i++) { | ||
193 | + csvss << M[i]; | ||
194 | + for (size_t j = 0; j < B; j++) csvss << ", " << M[j * B + i]; | ||
195 | + csvss << std::endl; | ||
196 | + } | ||
197 | + return csvss.str(); | ||
198 | + } | ||
199 | + | ||
200 | + //save the data as a CSV file | ||
201 | + void csv(std::string filename) { | ||
202 | + ofstream basisfile(filename.c_str()); | ||
203 | + basisfile << csv(); | ||
204 | + basisfile.close(); | ||
205 | + } | ||
206 | + | ||
171 | static matrix<T> I(size_t N) { | 207 | static matrix<T> I(size_t N) { |
172 | 208 | ||
173 | matrix<T> result(N, N); //create the identity matrix | 209 | matrix<T> result(N, N); //create the identity matrix |