#ifndef STIM_FLOW_H #define STIM_FLOW_H #include #include //STIM include #include #include #include #ifdef __CUDACC__ #include #include #endif namespace stim { template struct triple { A first; B second; C third; }; template struct bridge { std::vector v; // vertices' indices std::vector > V; // vertices' coordinates T l; // length T r; // radii T deltaP; // pressure drop T Q; // volume flow rate }; template class flow { private: // calculate the cofactor of elemen[row][col] void get_minor(T** src, T** dest, int row, int col, int order) { // index of element to be copied int rowCount = 0; int colCount = 0; for (int i = 0; i < order; i++) { if (i != row) { colCount = 0; for (int j = 0; j < order; j++) { // when j is not the element if (j != col) { dest[rowCount][colCount] = src[i][j]; colCount++; } } rowCount++; } } } // calculate the det() T determinant(T** mat, int order) { // degenate case when n = 1 if (order == 1) return mat[0][0]; T det = 0.0; // determinant value // allocate the cofactor matrix T** minor = (T**)malloc((order - 1) * sizeof(T*)); for (int i = 0; i < order - 1; i++) minor[i] = (T*)malloc((order - 1) * sizeof(T)); for (int i = 0; i < order; i++) { // get minor of element(0, i) get_minor(mat, minor, 0, i, order); // recursion det += (i % 2 == 1 ? -1.0 : 1.0) * mat[0][i] * determinant(minor, order - 1); } // release memory for (int i = 0; i < order - 1; i++) free(minor[i]); free(minor); return det; } public: T** C; // Conductance std::vector > Q; // volume flow rate std::vector QQ; // Q' vector std::vector P; // initial pressure std::vector pressure; // final pressure //std::vector > V; // velocity //std::vector > Q; // volume flow rate //std::vector > deltaP; // pressure drop flow() {} // default constructor void init(unsigned n_e, unsigned n_v) { C = new T*[n_v](); for (unsigned i = 0; i < n_v; i++) { C[i] = new T[n_v](); } QQ.resize(n_v); P.resize(n_v); pressure.resize(n_v); Q.resize(n_e); } void reset(unsigned n_v) { for (unsigned i = 0; i < n_v; i++) { for (unsigned j = 0; j < n_v; j++) { C[i][j] = 0; } } } void clear(unsigned n_v) { for (unsigned i = 0; i < n_v; i++) delete[] C[i]; delete[] C; } /// Calculate the inverse of A and store the result in C void inversion(T** A, int order, T* C) { #ifdef __CUDACC__ // convert from double pointer to single pointer, make it flat T* Aflat = (T*)malloc(order * order * sizeof(T)); for (unsigned i = 0; i < order; i++) for (unsigned j = 0; j < order; j++) Aflat[i * order + j] = A[i][j]; // create device pointer T* d_Aflat; // flat original matrix T* d_Cflat; // flat inverse matrix T** d_A; // put the flat original matrix into another array of pointer T** d_C; int *d_P; int *d_INFO; // allocate memory on device HANDLE_ERROR(cudaMalloc((void**)&d_Aflat, order * order * sizeof(T))); HANDLE_ERROR(cudaMalloc((void**)&d_Cflat, order * order * sizeof(T))); HANDLE_ERROR(cudaMalloc((void**)&d_A, sizeof(T*))); HANDLE_ERROR(cudaMalloc((void**)&d_C, sizeof(T*))); HANDLE_ERROR(cudaMalloc((void**)&d_P, order * 1 * sizeof(int))); HANDLE_ERROR(cudaMalloc((void**)&d_INFO, 1 * sizeof(int))); // copy matrix from host to device HANDLE_ERROR(cudaMemcpy(d_Aflat, Aflat, order * order * sizeof(T), cudaMemcpyHostToDevice)); // copy matrix from device to device HANDLE_ERROR(cudaMemcpy(d_A, &d_Aflat, sizeof(T*), cudaMemcpyHostToDevice)); HANDLE_ERROR(cudaMemcpy(d_C, &d_Cflat, sizeof(T*), cudaMemcpyHostToDevice)); // calculate the inverse of matrix based on cuBLAS cublasHandle_t handle; CUBLAS_HANDLE_ERROR(cublasCreate_v2(&handle)); // create cuBLAS handle object CUBLAS_HANDLE_ERROR(cublasSgetrfBatched(handle, order, d_A, order, d_P, d_INFO, 1)); int INFO = 0; HANDLE_ERROR(cudaMemcpy(&INFO, d_INFO, sizeof(int), cudaMemcpyDeviceToHost)); if (INFO == order) { std::cout << "Factorization Failed : Matrix is singular." << std::endl; cudaDeviceReset(); exit(1); } CUBLAS_HANDLE_ERROR(cublasSgetriBatched(handle, order, (const T **)d_A, order, d_P, d_C, order, d_INFO, 1)); CUBLAS_HANDLE_ERROR(cublasDestroy_v2(handle)); // copy inverse matrix from device to device HANDLE_ERROR(cudaMemcpy(&d_Cflat, d_C, sizeof(T*), cudaMemcpyDeviceToHost)); // copy inverse matrix from device to host HANDLE_ERROR(cudaMemcpy(C, d_Cflat, order * order * sizeof(T), cudaMemcpyDeviceToHost)); // clear up free(Aflat); HANDLE_ERROR(cudaFree(d_Aflat)); HANDLE_ERROR(cudaFree(d_Cflat)); HANDLE_ERROR(cudaFree(d_A)); HANDLE_ERROR(cudaFree(d_C)); HANDLE_ERROR(cudaFree(d_P)); HANDLE_ERROR(cudaFree(d_INFO)); #else // get the determinant of a double det = 1.0 / determinant(A, order); // memory allocation T* tmp = (T*)malloc((order - 1)*(order - 1) * sizeof(T)); T** minor = (T**)malloc((order - 1) * sizeof(T*)); for (int i = 0; i < order - 1; i++) minor[i] = tmp + (i * (order - 1)); for (int j = 0; j < order; j++) { for (int i = 0; i < order; i++) { // get the co-factor (matrix) of A(j,i) get_minor(A, minor, j, i, order); C[i][j] = det * determinant(minor, order - 1); if ((i + j) % 2 == 1) C[i][j] = -C[i][j]; } } // release memory free(tmp); free(minor); #endif } }; } #endif //// calculate the flow rate of 3D model(circle cross section) //void calculate_flow_rate(unsigned e, T r) { // stim::triple tmp_Q; // tmp_Q.first = V[e].first; // copy the vertices information // tmp_Q.second = V[e].second; // tmp_Q.third = V[e].third * stim::PI * pow(r, 2); // UNITS: uL/s // Q.push_back(tmp_Q); // push back the volume flow rate information for every edge //} //// calculate the flow rate of 2D model(rectangular cross section) //void calculate_flow_rate(unsigned e, T r, T h) { // stim::triple tmp_Q; // tmp_Q.first = V[e].first; // copy the vertices information // tmp_Q.second = V[e].second; // tmp_Q.third = V[e].third * h * r; // UNITS: uL/s = mm^3/s // Q.push_back(tmp_Q); // push back the volume flow rate information for every edge //} //// calculate the pressure drop of 3D model(circle cross section) //void calculate_deltaP(unsigned e, T u, T l, T r) { // stim::triple tmp_deltaP; // tmp_deltaP.first = V[e].first; // copy the vertices information // tmp_deltaP.second = V[e].second; // tmp_deltaP.third = (8 * u * l * Q[e].third) / (stim::PI * pow(r, 4)); // UNITS: g/mm/s^2 = Pa // deltaP.push_back(tmp_deltaP); // push back the volume flow rate information for every edge //} //// calculate the pressure drop of 2D model(rectangular cross section) //void calculate_deltaP(unsigned e, T u, T l, T r, T h) { // stim::triple tmp_deltaP; // tmp_deltaP.first = V[e].first; // copy the vertices information // tmp_deltaP.second = V[e].second; // tmp_deltaP.third = (12 * u * l * Q[e].third) / (h * pow(r, 3)); // UNITS: g/mm/s^2 = Pa // deltaP.push_back(tmp_deltaP); // push back the volume flow rate information for every edge //} //// better way to do this??? //// find the maximum and minimum pressure positions //void find_max_min_pressure(size_t n_e, size_t n_v, unsigned& max, unsigned& min) { // std::vector P(n_v, FLT_MAX); // // set one to reference // P[Q[0].first] = 0.0; // unsigned first = 0; // unsigned second = 0; // // calculate all the relative pressure in brute force manner // for (unsigned e = 0; e < n_e; e++) { // // assuming the obj file stores in a straight order, in other words, like swc file // first = Q[e].first; // second = Q[e].second; // if (P[first] != FLT_MAX) // if pressure at start vertex is known // P[second] = P[first] - deltaP[e].third; // else if (P[second] != FLT_MAX) // if pressure at end vertex is known // P[first] = P[second] + deltaP[e].third; // } // // find the maximum and minimum pressure position // auto m1 = std::max_element(P.begin(), P.end()); // temporarily max number // auto m2 = std::min_element(P.begin(), P.end()); // temporarily min number // max = std::distance(P.begin(), m1); // min = std::distance(P.begin(), m2); // T tmp_m = *m2; // // Now set the lowest pressure port to reference pressure(0.0 Pa) // for (unsigned i = 0; i < n_v; i++) // P[i] -= tmp_m; // for (unsigned i = 0; i < n_v; i++) // pressure.push_back(P[i]); //}