Commit dd5aab2f31a019f6dff1d9f678fb511b8ee5c5d8

Authored by David Mayerich
1 parent 6c4afcac

fixed errors in scalarmie involving Bessel functions

stim/envi/bil.h
... ... @@ -1126,35 +1126,35 @@ public:
1126 1126 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1127 1127 /// @param avg is a pointer to memory of size B that stores the average spectrum
1128 1128 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1129   - bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
  1129 + bool co_matrix(double* co, double* avg, unsigned char *mask, int cuda_device = 0, bool PROGRESS = false){
1130 1130 progress = 0;
1131 1131  
1132   - if(use_gpu){
  1132 + if (cuda_device >= 0) { //if a CUDA device is specified
1133 1133 int dev_count;
1134 1134 HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
1135   - std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
  1135 + std::cout << "Number of CUDA devices: " << dev_count << std::endl; //output the number of CUDA devices
1136 1136 cudaDeviceProp prop;
1137   - int best_device_id = 0; //stores the best CUDA device
1138   - float best_device_cc = 0.0f; //stores the compute capability of the best device
1139   - std::cout<<"CUDA devices:"<<std::endl;
1140   - for(int d = 0; d < dev_count; d++){ //for each CUDA device
  1137 + //int best_device_id = 0; //stores the best CUDA device
  1138 + //float best_device_cc = 0.0f; //stores the compute capability of the best device
  1139 + std::cout << "CUDA devices----" << std::endl;
  1140 + for (int d = 0; d < dev_count; d++) { //for each CUDA device
1141 1141 cudaGetDeviceProperties(&prop, d); //get the property of the first device
1142   - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
1143   - std::cout<<"("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
1144   - if(cc > best_device_cc){
1145   - best_device_cc = cc; //if this is better than the previous device, use it
1146   - best_device_id = d;
  1142 + //float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1143 + std::cout << d << ": [" << prop.major << "." << prop.minor << "] " << prop.name << std::endl; //display the device information
  1144 + //if(cc > best_device_cc){
  1145 + // best_device_cc = cc; //if this is better than the previous device, use it
  1146 + // best_device_id = d;
  1147 + //}
  1148 + }
  1149 + if (dev_count > 0 && dev_count > cuda_device) { //if the first device is not an emulator
  1150 + cudaGetDeviceProperties(&prop, cuda_device); //get the property of the requested CUDA device
  1151 + if (prop.major != 9999) {
  1152 + std::cout << "Using device " << cuda_device << std::endl;
  1153 + HANDLE_ERROR(cudaSetDevice(cuda_device));
  1154 + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1155 + if (status == 0) return true; //if the cuBLAS function returned correctly, we're done
1147 1156 }
1148   - }
1149   -
1150   - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
1151   - std::cout<<"Using device "<<best_device_id<<std::endl;
1152   - HANDLE_ERROR(cudaSetDevice(best_device_id));
1153   - int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1154   - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
1155   - } //otherwise continue using the CPU
1156   -
1157   - std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
  1157 + }
1158 1158 }
1159 1159  
1160 1160 //memory allocation
... ...
stim/envi/bip.h
... ... @@ -957,7 +957,7 @@ public:
957 957  
958 958 /// @param p is a pointer to pre-allocated memory of size [B * sizeof(T)] that stores the mean spectrum
959 959 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
960   - bool mean_spectrum(double* m, double* std, unsigned char* mask = NULL, bool PROGRESS = false){
  960 + bool mean_spectrum(double* m, double* std = NULL, unsigned char* mask = NULL, bool PROGRESS = false){
961 961 unsigned long long XY = X() * Y(); //calculate the total number of pixels in the HSI
962 962 T* temp = (T*)malloc(sizeof(T) * Z()); //allocate space for the current spectrum to be read
963 963 memset(m, 0, Z() * sizeof(double)); //set the mean spectrum to zero
... ... @@ -979,8 +979,10 @@ public:
979 979 }
980 980  
981 981 //calculate the standard deviation
982   - for(size_t i = 0; i < Z(); i++)
983   - std[i] = sqrt(e_x2[i] - m[i] * m[i]);
  982 + if (std != NULL) {
  983 + for (size_t i = 0; i < Z(); i++)
  984 + std[i] = sqrt(e_x2[i] - m[i] * m[i]);
  985 + }
984 986  
985 987 free(temp);
986 988 return true;
... ... @@ -1046,32 +1048,34 @@ public:
1046 1048 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1047 1049 /// @param avg is a pointer to memory of size B that stores the average spectrum
1048 1050 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1049   - bool co_matrix(double* co, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
  1051 + bool co_matrix(double* co, double* avg, unsigned char *mask, int cuda_device = 0, bool PROGRESS = false){
1050 1052 progress = 0;
1051 1053  
1052   - if(use_gpu){
  1054 + if(cuda_device >= 0){ //if a CUDA device is specified
1053 1055 int dev_count;
1054 1056 HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
1055 1057 std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
1056 1058 cudaDeviceProp prop;
1057   - int best_device_id = 0; //stores the best CUDA device
1058   - float best_device_cc = 0.0f; //stores the compute capability of the best device
  1059 + //int best_device_id = 0; //stores the best CUDA device
  1060 + //float best_device_cc = 0.0f; //stores the compute capability of the best device
1059 1061 std::cout<<"CUDA devices----"<<std::endl;
1060 1062 for(int d = 0; d < dev_count; d++){ //for each CUDA device
1061 1063 cudaGetDeviceProperties(&prop, d); //get the property of the first device
1062   - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1064 + //float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
1063 1065 std::cout<<d<<": ["<<prop.major<<"."<<prop.minor<<"] "<<prop.name<<std::endl; //display the device information
1064   - if(cc > best_device_cc){
1065   - best_device_cc = cc; //if this is better than the previous device, use it
1066   - best_device_id = d;
1067   - }
  1066 + //if(cc > best_device_cc){
  1067 + // best_device_cc = cc; //if this is better than the previous device, use it
  1068 + // best_device_id = d;
  1069 + //}
1068 1070 }
1069   -
1070   - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
1071   - std::cout<<"Using device "<<best_device_id<<std::endl;
1072   - HANDLE_ERROR(cudaSetDevice(best_device_id));
1073   - int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1074   - if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1071 + if(dev_count > 0 && dev_count > cuda_device){ //if the first device is not an emulator
  1072 + cudaGetDeviceProperties(&prop, cuda_device); //get the property of the requested CUDA device
  1073 + if (prop.major != 9999) {
  1074 + std::cout << "Using device " << cuda_device << std::endl;
  1075 + HANDLE_ERROR(cudaSetDevice(cuda_device));
  1076 + int status = co_matrix_cublas(co, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1077 + if (status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1078 + }
1075 1079 } //otherwise continue using the CPU
1076 1080  
1077 1081 std::cout<<"No supported CUDA devices found or cuBLAS failed. Using CPU"<<std::endl;
... ... @@ -1190,29 +1194,29 @@ public:
1190 1194 /// @param coN is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1191 1195 /// @param avg is a pointer to memory of size B that stores the average spectrum
1192 1196 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1193   - bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, bool use_gpu = true, bool PROGRESS = false){
  1197 + bool coNoise_matrix(double* coN, double* avg, unsigned char *mask, int cuda_device = 0, bool PROGRESS = false){
1194 1198  
1195   - if(use_gpu){
  1199 + if(cuda_device >= 0){
1196 1200 int dev_count;
1197 1201 HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
1198 1202 std::cout<<"Number of CUDA devices: "<<dev_count<<std::endl; //output the number of CUDA devices
1199 1203 cudaDeviceProp prop;
1200   - int best_device_id = 0; //stores the best CUDA device
1201   - float best_device_cc = 0.0f; //stores the compute capability of the best device
  1204 + //int best_device_id = 0; //stores the best CUDA device
  1205 + //float best_device_cc = 0.0f; //stores the compute capability of the best device
1202 1206 std::cout<<"CUDA devices:"<<std::endl;
1203 1207 for(int d = 0; d < dev_count; d++){ //for each CUDA device
1204 1208 cudaGetDeviceProperties(&prop, d); //get the property of the first device
1205   - float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1209 + //float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
1206 1210 std::cout<<d<<": ("<<prop.major<<"."<<prop.minor<<") "<<prop.name<<std::endl; //display the device information
1207   - if(cc > best_device_cc){
1208   - best_device_cc = cc; //if this is better than the previous device, use it
1209   - best_device_id = d;
1210   - }
  1211 + //if(cc > best_device_cc){
  1212 + // best_device_cc = cc; //if this is better than the previous device, use it
  1213 + // best_device_id = d;
  1214 + //}
1211 1215 }
1212 1216  
1213   - if(dev_count > 0 && prop.major != 9999){ //if the first device is not an emulator
1214   - std::cout<<"Using device "<<best_device_id<<std::endl;
1215   - HANDLE_ERROR(cudaSetDevice(best_device_id));
  1217 + if(dev_count > 0 && prop.major != 9999 && dev_count > cuda_device){ //if the first device is not an emulator
  1218 + std::cout<<"Using device "<< cuda_device <<std::endl;
  1219 + HANDLE_ERROR(cudaSetDevice(cuda_device));
1216 1220 int status = coNoise_matrix_cublas(coN, avg, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
1217 1221 if(status == 0) return true; //if the cuBLAS function returned correctly, we're done
1218 1222 } //otherwise continue using the CPU
... ... @@ -1265,7 +1269,6 @@ public:
1265 1269 return true;
1266 1270 }
1267 1271  
1268   - #ifdef CUDA_FOUND
1269 1272 /// Project the spectra onto a set of basis functions
1270 1273 /// @param outfile is the name of the new binary output file that will be created
1271 1274 /// @param center is a spectrum about which the data set will be rotated (ex. when performing mean centering)
... ... @@ -1359,7 +1362,6 @@ public:
1359 1362  
1360 1363 return true;
1361 1364 }
1362   -#endif
1363 1365  
1364 1366 /// Project the spectra onto a set of basis functions
1365 1367 /// @param outfile is the name of the new binary output file that will be created
... ... @@ -1367,16 +1369,32 @@ public:
1367 1369 /// @param basis a set of basis vectors that the data set will be projected onto (after centering)
1368 1370 /// @param M is the number of basis vectors
1369 1371 /// @param mask is a character mask used to limit processing to valid pixels
1370   - bool project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, bool PROGRESS = false){
  1372 + bool project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask = NULL, int cuda_device = 0, bool PROGRESS = false){
1371 1373  
1372   -#ifdef CUDA_FOUND
1373   - int dev_count;
1374   - cudaGetDeviceCount(&dev_count); //get the number of CUDA devices
1375   - cudaDeviceProp prop;
1376   - cudaGetDeviceProperties(&prop, 0); //get the property of the first device
1377   - if(dev_count > 0 && prop.major != 9999) //if the first device is not an emulator
1378   - return project_cublas(outfile,center,basis,M,mask,PROGRESS); //use cuBLAS to calculate the covariance matrix
1379   -#endif
  1374 + if (cuda_device >= 0) { //if a CUDA device is specified
  1375 + int dev_count;
  1376 + HANDLE_ERROR(cudaGetDeviceCount(&dev_count)); //get the number of CUDA devices
  1377 + std::cout << "Number of CUDA devices: " << dev_count << std::endl; //output the number of CUDA devices
  1378 + cudaDeviceProp prop;
  1379 + //int best_device_id = 0; //stores the best CUDA device
  1380 + //float best_device_cc = 0.0f; //stores the compute capability of the best device
  1381 + std::cout << "CUDA devices----" << std::endl;
  1382 + for (int d = 0; d < dev_count; d++) { //for each CUDA device
  1383 + cudaGetDeviceProperties(&prop, d); //get the property of the first device
  1384 + //float cc = prop.major + prop.minor / 10.0f; //calculate the compute capability
  1385 + std::cout << d << ": [" << prop.major << "." << prop.minor << "] " << prop.name << std::endl; //display the device information
  1386 + }
  1387 + if (dev_count > 0 && dev_count > cuda_device) { //if the first device is not an emulator
  1388 + cudaGetDeviceProperties(&prop, cuda_device); //get the property of the requested CUDA device
  1389 + if (prop.major != 9999) {
  1390 + std::cout << "Using device " << cuda_device << std::endl;
  1391 + HANDLE_ERROR(cudaSetDevice(cuda_device));
  1392 + int status = project_cublas(outfile, center, basis, M, mask, PROGRESS); //use cuBLAS to calculate the covariance matrix
  1393 + if (status == 0) return true; //if the cuBLAS function returned correctly, we're done
  1394 + }
  1395 + } //otherwise continue using the CPU
  1396 + }
  1397 +
1380 1398 std::ofstream target(outfile.c_str(), std::ios::binary); //open the target binary file
1381 1399 //std::string headername = outfile + ".hdr"; //the header file name
1382 1400  
... ... @@ -1500,7 +1518,7 @@ public:
1500 1518  
1501 1519 file.seekg(jump_sample, std::ios::cur);
1502 1520  
1503   - if(PROGRESS) progress = (double)((y+1) * samples + x + 1) / (lines * samples) * 100;
  1521 + if(PROGRESS) progress = (double)(y * samples + x + 1) / (lines * samples) * 100;
1504 1522 }
1505 1523  
1506 1524 file.seekg(jump_line, std::ios::cur);
... ...
stim/envi/envi.h
... ... @@ -575,7 +575,9 @@ public:
575 575 }
576 576 }
577 577  
578   - void project(std::string outfile, double* center, double* basis, unsigned long long M, unsigned char* mask, bool PROGRESS = false){
  578 + /// Project an array of coefficients onto a basis matrix
  579 +
  580 + void project(std::string outfile, double* center, double* basis, size_t M, std::vector<double> bands, unsigned char* mask, int cuda_device = 0, bool PROGRESS = false) {
579 581 if(header.interleave == envi_header::BSQ){ //if the infile is bsq file
580 582 std::cout<<"ERROR: BSQ projection not supported"<<std::endl;
581 583 exit(1);
... ... @@ -588,9 +590,9 @@ public:
588 590  
589 591 else if(header.interleave == envi_header::BIP){ //if the infile is bip file
590 592 if(header.data_type ==envi_header::float32)
591   - ((bip<float>*)file)->project(outfile, center, basis, M, mask, PROGRESS);
  593 + ((bip<float>*)file)->project(outfile, center, basis, M, mask, cuda_device, PROGRESS);
592 594 else if(header.data_type == envi_header::float64)
593   - ((bip<double>*)file)->project(outfile, center, basis, M, mask, PROGRESS);
  595 + ((bip<double>*)file)->project(outfile, center, basis, M, mask, cuda_device, PROGRESS);
594 596 else{
595 597 std::cout<<"ERROR: unidentified data type"<<std::endl;
596 598 exit(1);
... ... @@ -599,7 +601,7 @@ public:
599 601  
600 602 stim::envi_header out_hdr = header;
601 603 out_hdr.bands = M; //set the number of bands in the output header
602   - out_hdr.wavelength.clear();
  604 + out_hdr.wavelength = bands;
603 605 out_hdr.band_names.clear();
604 606 out_hdr.save(outfile + ".hdr"); //save the output header
605 607 }
... ... @@ -1467,16 +1469,16 @@ public:
1467 1469 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1468 1470 /// @param avg is a pointer to memory of size B that stores the average spectrum
1469 1471 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1470   - bool co_matrix(double* co, double* avg, unsigned char* mask, bool use_gpu, bool PROGRESS = false){
  1472 + bool co_matrix(double* co, double* avg, unsigned char* mask, int cuda_device, bool PROGRESS = false){
1471 1473 if (header.interleave == envi_header::BSQ){
1472 1474 std::cout<<"ERROR: calculating the covariance matrix for a BSQ file is impractical; convert to BIL or BIP first"<<std::endl;
1473 1475 exit(1);
1474 1476 }
1475 1477 else if (header.interleave == envi_header::BIL){
1476 1478 if (header.data_type == envi_header::float32)
1477   - return ((bil<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
  1479 + return ((bil<float>*)file)->co_matrix(co, avg, mask, cuda_device, PROGRESS);
1478 1480 else if (header.data_type == envi_header::float64)
1479   - return ((bil<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
  1481 + return ((bil<double>*)file)->co_matrix(co, avg, mask, cuda_device, PROGRESS);
1480 1482 else{
1481 1483 std::cout << "ERROR: unidentified data type" << std::endl;
1482 1484 exit(1);
... ... @@ -1484,9 +1486,9 @@ public:
1484 1486 }
1485 1487 else if (header.interleave == envi_header::BIP){
1486 1488 if (header.data_type == envi_header::float32)
1487   - return ((bip<float>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
  1489 + return ((bip<float>*)file)->co_matrix(co, avg, mask, cuda_device, PROGRESS);
1488 1490 else if (header.data_type == envi_header::float64)
1489   - return ((bip<double>*)file)->co_matrix(co, avg, mask, use_gpu, PROGRESS);
  1491 + return ((bip<double>*)file)->co_matrix(co, avg, mask, cuda_device, PROGRESS);
1490 1492 else{
1491 1493 std::cout << "ERROR: unidentified data type" << std::endl;
1492 1494 exit(1);
... ... @@ -1500,7 +1502,7 @@ public:
1500 1502 /// @param co is a pointer to pre-allocated memory of size [B * B] that stores the resulting covariance matrix
1501 1503 /// @param avg is a pointer to memory of size B that stores the average spectrum
1502 1504 /// @param mask is a pointer to memory of size [X * Y] that stores the mask value at each pixel location
1503   - bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, bool use_gpu = true, bool PROGRESS = false){
  1505 + bool coNoise_matrix(double* coN, double* avg, unsigned char* mask, int cuda_device = 0, bool PROGRESS = false){
1504 1506 if (header.interleave == envi_header::BSQ){
1505 1507 std::cout<<"ERROR: calculating the covariance matrix of noise for a BSQ file is impractical; convert to BIP first"<<std::endl;
1506 1508 exit(1);
... ... @@ -1514,9 +1516,9 @@ public:
1514 1516  
1515 1517 else if (header.interleave == envi_header::BIP){
1516 1518 if (header.data_type == envi_header::float32)
1517   - return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
  1519 + return ((bip<float>*)file)->coNoise_matrix(coN, avg, mask, cuda_device, PROGRESS);
1518 1520 else if (header.data_type == envi_header::float64)
1519   - return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, use_gpu, PROGRESS);
  1521 + return ((bip<double>*)file)->coNoise_matrix(coN, avg, mask, cuda_device, PROGRESS);
1520 1522 else{
1521 1523 std::cout << "ERROR: unidentified data type" << std::endl;
1522 1524 exit(1);
... ...
stim/image/image.h
... ... @@ -2,8 +2,9 @@
2 2 #define STIM_IMAGE_H
3 3  
4 4 #ifdef USING_OPENCV
5   - #include <opencv2/core/core.hpp>
6   - #include <opencv2/highgui/highgui.hpp>
  5 + //#include <opencv2/core/core.hpp>
  6 + //#include <opencv2/highgui/highgui.hpp>
  7 + #include <opencv2/opencv.hpp>
7 8 #endif
8 9 #include <vector>
9 10 #include <iostream>
... ...
stim/math/bessel.h
... ... @@ -1508,6 +1508,8 @@ int cbessjyva(P v,complex&lt;P&gt; z,P &amp;vm,complex&lt;P&gt;*cjv,
1508 1508 return 0;
1509 1509 }
1510 1510  
  1511 +///Calculate the spherical bessel functions and their derivatives up to order v
  1512 +/// When allocating arrays to store the resulting values, arrays must be of size [v+2]
1511 1513 template<typename P>
1512 1514 int cbessjyva_sph(int v,complex<P> z,P &vm,complex<P>*cjv,
1513 1515 complex<P>*cyv,complex<P>*cjvp,complex<P>*cyvp)
... ...
stim/math/matrix.h
... ... @@ -28,13 +28,14 @@ class matrix {
28 28 }
29 29  
30 30 public:
  31 +
31 32 matrix(size_t rows, size_t cols) {
32 33 init(rows, cols); //initialize memory
33 34 }
34 35  
35 36 matrix(size_t rows, size_t cols, T* data) {
36 37 init(rows, cols);
37   - memcpy(M, rhs, R * C * sizeof(T));
  38 + memcpy(M, data, R * C * sizeof(T));
38 39 }
39 40  
40 41 matrix(const matrix<T>& cpy){
... ... @@ -60,6 +61,9 @@ public:
60 61 }
61 62  
62 63 matrix<T> operator=(T rhs) {
  64 + if (&rhs == this)
  65 + return *this;
  66 + init(R, C);
63 67 size_t N = R * C;
64 68 for(size_t n=0; n<N; n++)
65 69 M[n] = rhs;
... ... @@ -164,6 +168,16 @@ public:
164 168 return ss.str();
165 169 }
166 170  
  171 + static matrix<T> I(size_t N) {
  172 +
  173 + matrix<T> result(N, N); //create the identity matrix
  174 + memset(result.M, 0, N * N * sizeof(T)); //set the entire matrix to zero
  175 + for (size_t n = 0; n < N; n++) {
  176 + result(n, n) = (T)1; //set the diagonal component to 1
  177 + }
  178 + return result;
  179 + }
  180 +
167 181 };
168 182  
169 183 } //end namespace rts
... ...
stim/optics/scalarmie.h
... ... @@ -16,15 +16,15 @@ void B_coefficients(stim::complex&lt;T&gt;* B, T a, T k, stim::complex&lt;T&gt; n, int Nl){
16 16  
17 17 //temporary variables
18 18 double vm; //allocate space to store the return values for the bessel function calculation
19   - double* j_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
20   - double* y_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
21   - double* dj_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
22   - double* dy_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  19 + double* j_ka = (double*) malloc( (Nl + 2) * sizeof(double) );
  20 + double* y_ka = (double*) malloc( (Nl + 2) * sizeof(double) );
  21 + double* dj_ka= (double*) malloc( (Nl + 2) * sizeof(double) );
  22 + double* dy_ka= (double*) malloc( (Nl + 2) * sizeof(double) );
23 23  
24   - stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
25   - stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
26   - stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
27   - stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  24 + stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 2) * sizeof(stim::complex<double>) );
  25 + stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 2) * sizeof(stim::complex<double>) );
  26 + stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 2) * sizeof(stim::complex<double>) );
  27 + stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 2) * sizeof(stim::complex<double>) );
28 28  
29 29 double ka = k * a; //store k*a (argument for spherical bessel and Hankel functions)
30 30 stim::complex<double> kna = k * n * a; //store k*n*a (argument for spherical bessel functions and derivatives)
... ... @@ -45,21 +45,24 @@ void B_coefficients(stim::complex&lt;T&gt;* B, T a, T k, stim::complex&lt;T&gt; n, int Nl){
45 45 denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n;
46 46 B[l] = (2 * l + 1) * pow(i, l) * numerator / denominator;
47 47 }
  48 +
  49 + //free memory
  50 + free(j_ka); free(y_ka); free(dj_ka); free(dy_ka); free(j_kna); free(y_kna); free(dj_kna); free(dy_kna);
48 51 }
49 52  
50 53 template<typename T>
51 54 void A_coefficients(stim::complex<T>* A, T a, T k, stim::complex<T> n, int Nl){
52 55 //temporary variables
53 56 double vm; //allocate space to store the return values for the bessel function calculation
54   - double* j_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
55   - double* y_ka = (double*) malloc( (Nl + 1) * sizeof(double) );
56   - double* dj_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
57   - double* dy_ka= (double*) malloc( (Nl + 1) * sizeof(double) );
  57 + double* j_ka = (double*) malloc( (Nl + 2) * sizeof(double) );
  58 + double* y_ka = (double*) malloc( (Nl + 2) * sizeof(double) );
  59 + double* dj_ka= (double*) malloc( (Nl + 2) * sizeof(double) );
  60 + double* dy_ka= (double*) malloc( (Nl + 2) * sizeof(double) );
58 61  
59   - stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
60   - stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
61   - stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
62   - stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 1) * sizeof(stim::complex<double>) );
  62 + stim::complex<double>* j_kna = (stim::complex<double>*) malloc( (Nl + 2) * sizeof(stim::complex<double>) );
  63 + stim::complex<double>* y_kna = (stim::complex<double>*) malloc( (Nl + 2) * sizeof(stim::complex<double>) );
  64 + stim::complex<double>* dj_kna= (stim::complex<double>*) malloc( (Nl + 2) * sizeof(stim::complex<double>) );
  65 + stim::complex<double>* dy_kna= (stim::complex<double>*) malloc( (Nl + 2) * sizeof(stim::complex<double>) );
63 66  
64 67 double ka = k * a; //store k*a (argument for spherical bessel and Hankel functions)
65 68 stim::complex<double> kna = k * n * a; //store k*n*a (argument for spherical bessel functions and derivatives)
... ... @@ -80,6 +83,8 @@ void A_coefficients(stim::complex&lt;T&gt;* A, T a, T k, stim::complex&lt;T&gt; n, int Nl){
80 83 denominator = j_kna[l] * dh_ka - h_ka * dj_kna[l] * (stim::complex<double>)n;
81 84 A[l] = (2 * l + 1) * pow(i, l) * numerator / denominator;
82 85 }
  86 + //free memory
  87 + free(j_ka); free(y_ka); free(dj_ka); free(dy_ka); free(j_kna); free(y_kna); free(dj_kna); free(dy_kna);
83 88 }
84 89  
85 90 #define LOCAL_NL 16
... ...