SuperLU Distributed 9.0.0
gpu3d
|
Distributed SuperLU data types and function prototypes. More...
#include "superlu_defs.h"
Go to the source code of this file.
Classes | |
struct | dScalePermstruct_t |
struct | dLocalLU_t |
struct | dLUValSubBuf_t |
struct | dtrf3Dpartition_t |
struct | dLUstruct_t |
struct | pdgsmv_comm_t |
struct | dSOLVEstruct_t |
struct | dscuBufs_t |
struct | ddiagFactBufs_t |
struct | dxT_struct |
struct | dlsumBmod_buff_t |
Macros | |
#define | MAX_LOOKAHEADS 50 |
Typedefs | |
typedef struct dxT_struct | dxT_struct |
typedef struct dlsumBmod_buff_t | dlsumBmod_buff_t |
Functions | |
int_t | scuStatUpdate (int_t knsupc, HyP_t *HyP, SCT_t *SCT, SuperLUStat_t *stat) |
void | dCreate_CompCol_Matrix_dist (SuperMatrix *, int_t, int_t, int_t, double *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t) |
void | dCreate_CompRowLoc_Matrix_dist (SuperMatrix *, int_t, int_t, int_t, int_t, int_t, double *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t) |
void | dCompRow_to_CompCol_dist (int_t, int_t, int_t, double *, int_t *, int_t *, double **, int_t **, int_t **) |
void | dCompCol_to_CompRow_dist (int_t m, int_t n, int_t nnz, double *a, int_t *colptr, int_t *rowind, double **at, int_t **rowptr, int_t **colind) |
int | pdCompRow_loc_to_CompCol_global (int_t, SuperMatrix *, gridinfo_t *, SuperMatrix *) |
Gather A from the distributed compressed row format to global A in compressed column format. More... | |
void | dCopy_CompCol_Matrix_dist (SuperMatrix *, SuperMatrix *) |
void | dCreate_Dense_Matrix_dist (SuperMatrix *, int_t, int_t, double *, int_t, Stype_t, Dtype_t, Mtype_t) |
void | dCreate_SuperNode_Matrix_dist (SuperMatrix *, int_t, int_t, int_t, double *, int_t *, int_t *, int_t *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t) |
void | dCopy_Dense_Matrix_dist (int_t, int_t, double *, int_t, double *, int_t) |
void | dallocateA_dist (int_t, int_t, double **, int_t **, int_t **) |
void | dGenXtrue_dist (int_t, int_t, double *, int_t) |
void | dFillRHS_dist (char *, int_t, double *, int_t, SuperMatrix *, double *, int_t) |
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's. More... | |
int | dcreate_matrix (SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *) |
int | dcreate_matrix_rb (SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *) |
int | dcreate_matrix_dat (SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *) |
int | dcreate_matrix_postfix (SuperMatrix *, int, double **, int *, double **, int *, FILE *, char *, gridinfo_t *) |
void | dScalePermstructInit (const int_t, const int_t, dScalePermstruct_t *) |
Allocate storage in ScalePermstruct. More... | |
void | dScalePermstructFree (dScalePermstruct_t *) |
Deallocate ScalePermstruct. More... | |
void | dgsequ_dist (SuperMatrix *, double *, double *, double *, double *, double *, int *) |
double | dlangs_dist (char *, SuperMatrix *) |
void | dlaqgs_dist (SuperMatrix *, double *, double *, double, double, double, char *) |
void | pdgsequ (SuperMatrix *, double *, double *, double *, double *, double *, int *, gridinfo_t *) |
double | pdlangs (char *, SuperMatrix *, gridinfo_t *) |
void | pdlaqgs (SuperMatrix *, double *, double *, double, double, double, char *) |
int | pdPermute_Dense_Matrix (int_t, int_t, int_t[], int_t[], double[], int, double[], int, int, gridinfo_t *) |
Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-th row of B. More... | |
int | sp_dtrsv_dist (char *, char *, char *, SuperMatrix *, SuperMatrix *, double *, int *) |
int | sp_dgemv_dist (char *, double, SuperMatrix *, double *, int, double, double *, int) |
SpGEMV. More... | |
int | sp_dgemm_dist (char *, int, double, SuperMatrix *, double *, int, double, double *, int) |
float | ddistribute (superlu_dist_options_t *, int_t, SuperMatrix *, Glu_freeable_t *, dLUstruct_t *, gridinfo_t *) |
void | pdgssvx_ABglobal (superlu_dist_options_t *, SuperMatrix *, dScalePermstruct_t *, double *, int, int, gridinfo_t *, dLUstruct_t *, double *, SuperLUStat_t *, int *) |
float | pddistribute (superlu_dist_options_t *, int_t, SuperMatrix *, dScalePermstruct_t *, Glu_freeable_t *, dLUstruct_t *, gridinfo_t *) |
float | pddistribute_allgrid (superlu_dist_options_t *options, int_t n, SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo_t *grid, int *supernodeMask) |
float | pddistribute_allgrid_index_only (superlu_dist_options_t *options, int_t n, SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo_t *grid, int *supernodeMask) |
void | pdgssvx (superlu_dist_options_t *, SuperMatrix *, dScalePermstruct_t *, double *, int, int, gridinfo_t *, dLUstruct_t *, dSOLVEstruct_t *, double *, SuperLUStat_t *, int *) |
void | pdCompute_Diag_Inv (int_t, dLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *) |
int | dSolveInit (superlu_dist_options_t *, SuperMatrix *, int_t[], int_t[], int_t, dLUstruct_t *, gridinfo_t *, dSOLVEstruct_t *) |
Initialize the data structure for the solution phase. More... | |
void | dSolveFinalize (superlu_dist_options_t *, dSOLVEstruct_t *) |
Release the resources used for the solution phase. More... | |
void | dDestroy_A3d_gathered_on_2d (dSOLVEstruct_t *, gridinfo3d_t *) |
int_t | pdgstrs_init (int_t, int_t, int_t, int_t, int_t[], int_t[], gridinfo_t *grid, Glu_persist_t *, dSOLVEstruct_t *) |
int_t | pdgstrs_init_device_lsum_x (superlu_dist_options_t *, int_t, int_t, int_t, gridinfo_t *, dLUstruct_t *, dSOLVEstruct_t *, int *) |
int_t | pdgstrs_delete_device_lsum_x (dSOLVEstruct_t *) |
void | pxgstrs_finalize (pxgstrs_comm_t *) |
int | dldperm_dist (int, int, int_t, int_t[], int_t[], double[], int_t *, double[], double[]) |
int | dstatic_schedule (superlu_dist_options_t *, int, int, dLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int_t *, int_t *, int *) |
void | dLUstructInit (const int_t, dLUstruct_t *) |
Allocate storage in LUstruct. More... | |
void | dLUstructFree (dLUstruct_t *) |
Deallocate LUstruct. More... | |
void | dDestroy_LU (int_t, gridinfo_t *, dLUstruct_t *) |
Destroy distributed L & U matrices. More... | |
void | dDestroy_Tree (int_t, gridinfo_t *, dLUstruct_t *) |
Destroy broadcast and reduction trees used in triangular solve. More... | |
void | dscatter_l (int ib, int ljb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *usub, int_t *lsub, double *tempv, int *indirect_thread, int *indirect2, int_t **Lrowind_bc_ptr, double **Lnzval_bc_ptr, gridinfo_t *grid) |
void | dscatter_u (int ib, int jb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *lsub, int_t *usub, double *tempv, int_t **Ufstnz_br_ptr, double **Unzval_br_ptr, gridinfo_t *grid) |
int_t | pdgstrf (superlu_dist_options_t *, int, int, double anorm, dLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *) |
void | pdgstrs_Bglobal (superlu_dist_options_t *, int_t, dLUstruct_t *, gridinfo_t *, double *, int_t, int, SuperLUStat_t *, int *) |
void | pdgstrs (superlu_dist_options_t *, int_t, dLUstruct_t *, dScalePermstruct_t *, gridinfo_t *, double *, int_t, int_t, int_t, int, dSOLVEstruct_t *, SuperLUStat_t *, int *) |
void | pdgstrf2_trsm (superlu_dist_options_t *options, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, dLocalLU_t *, MPI_Request *, int tag_ub, SuperLUStat_t *, int *info) |
void | pdgstrs2_omp (int_t k0, int_t k, Glu_persist_t *, gridinfo_t *, dLocalLU_t *, Ublock_info_t *, SuperLUStat_t *) |
int_t | pdReDistribute_B_to_X (double *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, double *x, dScalePermstruct_t *, Glu_persist_t *, gridinfo_t *, dSOLVEstruct_t *) |
void | dlsum_fmod (double *, double *, double *, double *, int, int, int_t, int *fmod, int_t, int_t, int_t, int_t *, gridinfo_t *, dLocalLU_t *, MPI_Request[], SuperLUStat_t *) |
void | dlsum_bmod (double *, double *, double *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, dLocalLU_t *, MPI_Request[], SuperLUStat_t *) |
void | dlsum_fmod_inv (double *, double *, double *, double *, int, int_t, int *fmod, int_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int_t, int_t, int, int) |
void | dlsum_fmod_inv_master (double *, double *, double *, double *, int, int, int_t, int *fmod, int_t, int_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t **, int_t, int_t, int_t, int_t, int, int) |
void | dlsum_bmod_inv (double *, double *, double *, double *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int, int) |
void | dlsum_bmod_inv_master (double *, double *, double *, double *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t **, int_t, int_t, int, int) |
void | dComputeLevelsets (int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *, int_t *) |
void | pdconvertU (superlu_dist_options_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, int) |
void | dlsum_fmod_inv_gpu_wrap (int, int, int, int, double *, double *, int, int, int_t, int *fmod, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, double *, int64_t *, double *, int64_t *, int_t *, int64_t *, int_t *, int *, gridinfo_t *, int_t, uint64_t *, uint64_t *, double *, double *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int) |
void | dlsum_bmod_inv_gpu_wrap (superlu_dist_options_t *, int, int, int, int, double *, double *, int, int, int_t, int *, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, int_t *, int64_t *, int_t *, int64_t *, double *, int64_t *, double *, int64_t *, double *, int64_t *, int_t *, int64_t *, int_t *, gridinfo_t *, int_t, uint64_t *, uint64_t *, double *, double *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int) |
void | pdgsrfs (superlu_dist_options_t *, int_t, SuperMatrix *, double, dLUstruct_t *, dScalePermstruct_t *, gridinfo_t *, double[], int_t, double[], int_t, int, dSOLVEstruct_t *, double *, SuperLUStat_t *, int *) |
void | pdgsrfs3d (superlu_dist_options_t *, int_t, SuperMatrix *, double, dLUstruct_t *, dScalePermstruct_t *, gridinfo3d_t *, dtrf3Dpartition_t *, double *, int_t, double *, int_t, int, dSOLVEstruct_t *, double *, SuperLUStat_t *, int *) |
void | pdgsrfs_ABXglobal (superlu_dist_options_t *, int_t, SuperMatrix *, double, dLUstruct_t *, gridinfo_t *, double *, int_t, double *, int_t, int, double *, SuperLUStat_t *, int *) |
int | pdgsmv_AXglobal_setup (SuperMatrix *, Glu_persist_t *, gridinfo_t *, int_t *, int_t *[], double *[], int_t *[], int_t[]) |
int | pdgsmv_AXglobal (int_t, int_t[], double[], int_t[], double[], double[]) |
int | pdgsmv_AXglobal_abs (int_t, int_t[], double[], int_t[], double[], double[]) |
void | pdgsmv_init (SuperMatrix *, int_t *, gridinfo_t *, pdgsmv_comm_t *) |
void | pdgsmv (int_t, SuperMatrix *, gridinfo_t *, pdgsmv_comm_t *, double x[], double ax[]) |
void | pdgsmv_finalize (pdgsmv_comm_t *) |
int_t | dinitLsumBmod_buff (int_t ns, int nrhs, dlsumBmod_buff_t *lbmod_buf) |
int_t | dleafForestBackSolve3d (superlu_dist_options_t *options, int_t treeId, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x, double *lsum, double *recvbuf, MPI_Request *send_req, int nrhs, dlsumBmod_buff_t *lbmod_buf, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int_t | dnonLeafForestBackSolve3d (int_t treeId, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x, double *lsum, dxT_struct *xT_s, double *recvbuf, MPI_Request *send_req, int nrhs, dlsumBmod_buff_t *lbmod_buf, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int_t | dlasum_bmod_Tree (int_t pTree, int_t cTree, double *lsum, double *x, dxT_struct *xT_s, int nrhs, dlsumBmod_buff_t *lbmod_buf, dLUstruct_t *LUstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, SuperLUStat_t *stat) |
int_t | dlsumForestBsolve (int_t k, int_t treeId, double *lsum, double *x, dxT_struct *xT_s, int nrhs, dlsumBmod_buff_t *lbmod_buf, dLUstruct_t *LUstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, SuperLUStat_t *stat) |
int_t | dbCastXk2Pck (int_t k, dxT_struct *xT_s, int nrhs, dLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer) |
int_t | dlsumReducePrK (int_t k, double *x, double *lsum, double *recvbuf, int nrhs, dLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer) |
int_t | dnonLeafForestForwardSolve3d (int_t treeId, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x, double *lsum, dxT_struct *xT_s, double *recvbuf, double *rtemp, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int_t | dleafForestForwardSolve3d (superlu_dist_options_t *options, int_t treeId, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x, double *lsum, double *recvbuf, double *rtemp, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int | dtrs_compute_communication_structure (superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, int *supernodeMask, gridinfo_t *grid, SuperLUStat_t *stat) |
int_t | dreduceSolvedX_newsolve (int_t treeId, int_t sender, int_t receiver, double *x, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, double *recvbuf, xtrsTimer_t *xtrsTimer) |
void | dlsum_fmod_leaf (int_t treeId, dtrf3Dpartition_t *trf3Dpartition, double *lsum, double *x, double *xk, double *rtemp, int nrhs, int knsupc, int_t k, int *fmod, int_t nlb, int_t lptr, int_t luptr, int_t *xsup, gridinfo_t *grid, dLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
void | dlsum_fmod_leaf_newsolve (dtrf3Dpartition_t *trf3Dpartition, double *lsum, double *x, double *xk, double *rtemp, int nrhs, int knsupc, int_t k, int *fmod, int_t nlb, int_t lptr, int_t luptr, int_t *xsup, gridinfo_t *grid, dLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
void | dlsum_bmod_GG (double *lsum, double *x, double *xk, int nrhs, dlsumBmod_buff_t *lbmod_buf, int_t k, int *bmod, int_t *Urbs, Ucb_indptr_t **Ucb_indptr, int_t **Ucb_valptr, int_t *xsup, gridinfo_t *grid, dLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
void | dlsum_bmod_GG_newsolve (dtrf3Dpartition_t *trf3Dpartition, double *lsum, double *x, double *xk, int nrhs, dlsumBmod_buff_t *lbmod_buf, int_t k, int *bmod, int_t *Urbs, Ucb_indptr_t **Ucb_indptr, int_t **Ucb_valptr, int_t *xsup, gridinfo_t *grid, dLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int_t | pdReDistribute3d_B_to_X (double *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, double *x, dScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo3d_t *grid3d, dSOLVEstruct_t *SOLVEstruct) |
int_t | pdReDistribute3d_X_to_B (int_t n, double *B, int_t m_loc, int_t ldb, int_t fst_row, int nrhs, double *x, int_t *ilsum, dScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo3d_t *grid3d, dSOLVEstruct_t *SOLVEstruct) |
void | pdgstrs3d (superlu_dist_options_t *, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info) |
void | pdgstrs3d_newsolve (superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info) |
int_t | pdgsTrBackSolve3d (superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x3d, double *lsum3d, dxT_struct *xT_s, double *recvbuf, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int_t | pdgsTrForwardSolve3d (superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x3d, double *lsum3d, dxT_struct *xT_s, double *recvbuf, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int_t | pdgsTrForwardSolve3d_newsolve (superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x3d, double *lsum3d, double *recvbuf, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int_t | pdgsTrBackSolve3d_newsolve (superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x3d, double *lsum3d, double *recvbuf, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer) |
int_t | dbroadcastAncestor3d (dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dlocalSolveXkYk (trtype_t trtype, int_t k, double *x, int nrhs, dLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat) |
int_t | diBcastXk2Pck (int_t k, double *x, int nrhs, int **sendList, MPI_Request *send_req, dLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer) |
int_t | dtrs_B_init3d (int_t nsupers, double *x, int nrhs, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
int_t | dtrs_X_gather3d (double *x, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer) |
int_t | dfsolveReduceLsum3d (int_t treeId, int_t sender, int_t receiver, double *lsum, double *recvbuf, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer) |
int_t | dbsolve_Xt_bcast (int_t ilvl, dxT_struct *xT_s, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer) |
int_t | dp2pSolvedX3d (int_t treeId, int_t sender, int_t receiver, double *x, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer) |
double * | doubleMalloc_dist (int_t) |
double * | doubleCalloc_dist (int_t) |
void * | duser_malloc_dist (int_t, int_t) |
void | duser_free_dist (int_t, int_t) |
int_t | dQuerySpace_dist (int_t, dLUstruct_t *, gridinfo_t *, SuperLUStat_t *, superlu_dist_mem_usage_t *) |
void | dClone_CompRowLoc_Matrix_dist (SuperMatrix *, SuperMatrix *) |
void | dCopy_CompRowLoc_Matrix_dist (SuperMatrix *, SuperMatrix *) |
void | dZero_CompRowLoc_Matrix_dist (SuperMatrix *) |
Sets all entries of a matrix to zero, A_{i,j}=0, for i,j=1,..,n. More... | |
void | dScaleAddId_CompRowLoc_Matrix_dist (SuperMatrix *, double) |
Scale and add I: scales a matrix and adds an identity. A_{i,j} = c * A_{i,j} + \delta_{i,j} for i,j=1,...,n and \delta_{i,j} is the Kronecker delta. More... | |
void | dScaleAdd_CompRowLoc_Matrix_dist (SuperMatrix *, SuperMatrix *, double) |
Scale and add: adds a scalar multiple of one matrix to another. A_{i,j} = c * A_{i,j} + B_{i,j}$ for i,j=1,...,n. More... | |
void | dZeroLblocks (int, int, gridinfo_t *, dLUstruct_t *) |
Sets all entries of matrix L to zero. More... | |
void | dZeroUblocks (int iam, int n, gridinfo_t *, dLUstruct_t *) |
Sets all entries of matrix U to zero. More... | |
double | dMaxAbsLij (int iam, int n, Glu_persist_t *, dLUstruct_t *, gridinfo_t *) |
Find max(abs(L(i,j))) More... | |
double | dMaxAbsUij (int iam, int n, Glu_persist_t *, dLUstruct_t *, gridinfo_t *) |
Find max(abs(U(i,j))) More... | |
void | dfill_dist (double *, int_t, double) |
Fills a double precision array with a given value. More... | |
void | dinf_norm_error_dist (int_t, int_t, double *, int_t, double *, int_t, gridinfo_t *) |
Check the inf-norm of the error vector. More... | |
void | pdinf_norm_error (int, int_t, int_t, double[], int_t, double[], int_t, MPI_Comm) |
Check the inf-norm of the error vector. More... | |
void | dreadhb_dist (int, FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **) |
void | dreadtriple_dist (FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **) |
void | dreadtriple_noheader (FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **) |
void | dreadrb_dist (int, FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **) |
void | dreadMM_dist (FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **) |
int | dread_binary (FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **) |
void | validateInput_pdgssvx3d (superlu_dist_options_t *, SuperMatrix *A, int ldb, int nrhs, gridinfo3d_t *, int *info) |
Validates the input parameters for a given problem. More... | |
void | dallocScalePermstruct_RC (dScalePermstruct_t *, int_t m, int_t n) |
void | dscaleMatrixDiagonally (fact_t Fact, dScalePermstruct_t *, SuperMatrix *, SuperLUStat_t *, gridinfo_t *, int *rowequ, int *colequ, int *iinfo) |
void | dperform_row_permutation (superlu_dist_options_t *, fact_t Fact, dScalePermstruct_t *, dLUstruct_t *LUstruct, int_t m, int_t n, gridinfo_t *, SuperMatrix *A, SuperMatrix *GA, SuperLUStat_t *, int job, int Equil, int *rowequ, int *colequ, int *iinfo) |
double | dcomputeA_Norm (int notran, SuperMatrix *, gridinfo_t *) |
This function computes the norm of a matrix A. More... | |
float | ddist_psymbtonum (superlu_dist_options_t *, int_t, SuperMatrix *, dScalePermstruct_t *, Pslu_freeable_t *, dLUstruct_t *, gridinfo_t *) |
void | pdGetDiagU (int_t, dLUstruct_t *, gridinfo_t *, double *) |
int | d_c2cpp_GetHWPM (SuperMatrix *, gridinfo_t *, dScalePermstruct_t *) |
void | dPrintLblocks (int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *) |
Print the blocks in the factored matrix L. More... | |
void | dPrintUblocks (int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *) |
Print the blocks in the factored matrix U. More... | |
void | dPrint_CompCol_Matrix_dist (SuperMatrix *) |
void | dPrint_Dense_Matrix_dist (SuperMatrix *) |
int | dPrint_CompRowLoc_Matrix_dist (SuperMatrix *) |
int | file_dPrint_CompRowLoc_Matrix_dist (FILE *fp, SuperMatrix *A) |
void | Printdouble5 (char *, int_t, double *) |
int | file_Printdouble5 (FILE *, char *, int_t, double *) |
void | dGenCOOLblocks (int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *, int_t **, int_t **, double **, int_t *, int_t *) |
void | dGenCSCLblocks (int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *, double **, int_t **, int_t **, int_t *, int_t *) |
void | dGenCSRLblocks (int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *, double **, int_t **, int_t **, int_t *, int_t *) |
void | nv_init_wrapper (MPI_Comm) |
void | dprepare_multiGPU_buffers (int, int, int, int, int, int) |
void | ddelete_multiGPU_buffers () |
int | dgemm_ (const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *) |
int | dtrsv_ (char *, char *, char *, int *, double *, int *, double *, int *) |
int | dtrsm_ (const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *) |
void | dgemv_ (const char *, const int *, const int *, const double *, const double *a, const int *, const double *, const int *, const double *, double *, const int *) |
void | dger_ (const int *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *) |
int | dscal_ (const int *n, const double *alpha, double *dx, const int *incx) |
int | daxpy_ (const int *n, const double *alpha, const double *x, const int *incx, double *y, const int *incy) |
int | superlu_dgemm (const char *transa, const char *transb, int m, int n, int k, double alpha, double *a, int lda, double *b, int ldb, double beta, double *c, int ldc) |
int | superlu_dtrsm (const char *sideRL, const char *uplo, const char *transa, const char *diag, const int m, const int n, const double alpha, const double *a, const int lda, double *b, const int ldb) |
int | superlu_dger (const int m, const int n, const double alpha, const double *x, const int incx, const double *y, const int incy, double *a, const int lda) |
int | superlu_dscal (const int n, const double alpha, double *x, const int incx) |
int | superlu_daxpy (const int n, const double alpha, const double *x, const int incx, double *y, const int incy) |
int | superlu_dgemv (const char *trans, const int m, const int n, const double alpha, const double *a, const int lda, const double *x, const int incx, const double beta, double *y, const int incy) |
int | superlu_dtrsv (char *uplo, char *trans, char *diag, int n, double *a, int lda, double *x, int incx) |
void | dtrtri_ (char *, char *, int *, double *, int *, int *) |
int | dcreate_matrix3d (SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, gridinfo3d_t *grid3d) |
int | dcreate_matrix_postfix3d (SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d) |
int | dcreate_block_diag_3d (SuperMatrix *A, int batchCount, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d) |
int | dcreate_batch_systems (handle_t *SparseMatrix_handles, int batchCount, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d) |
void | dGatherNRformat_loc3d (fact_t Fact, NRformat_loc *A, double *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **) |
void | dGatherNRformat_loc3d_allgrid (fact_t Fact, NRformat_loc *A, double *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **) |
int | dScatter_B3d (NRformat_loc3d *A3d, gridinfo3d_t *grid3d) |
void | pdgssvx3d (superlu_dist_options_t *, SuperMatrix *, dScalePermstruct_t *, double B[], int ldb, int nrhs, gridinfo3d_t *, dLUstruct_t *, dSOLVEstruct_t *, double *berr, SuperLUStat_t *, int *info) |
int_t | pdgstrf3d (superlu_dist_options_t *, int m, int n, double anorm, dtrf3Dpartition_t *, SCT_t *, dLUstruct_t *, gridinfo3d_t *, SuperLUStat_t *, int *) |
void | dInit_HyP (superlu_dist_options_t *, HyP_t *HyP, dLocalLU_t *Llu, int_t mcb, int_t mrb) |
void | Free_HyP (HyP_t *HyP) |
int | updateDirtyBit (int_t k0, HyP_t *HyP, gridinfo_t *grid) |
void | dblock_gemm_scatter (int_t lb, int_t j, Ublock_info_t *Ublock_info, Remain_info_t *Remain_info, double *L_mat, int ldl, double *U_mat, int ldu, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int_t thread_id, int *indirect, int *indirect2, int_t **Lrowind_bc_ptr, double **Lnzval_bc_ptr, int_t **Ufstnz_br_ptr, double **Unzval_br_ptr, int_t *xsup, gridinfo_t *, SuperLUStat_t *) |
int_t | dblock_gemm_scatterTopLeft (int_t lb, int_t j, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, dLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *) |
int_t | dblock_gemm_scatterTopRight (int_t lb, int_t j, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, dLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *) |
int_t | dblock_gemm_scatterBottomLeft (int_t lb, int_t j, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, dLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *) |
int_t | dblock_gemm_scatterBottomRight (int_t lb, int_t j, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, dLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *) |
void | dgather_u (int_t num_u_blks, Ublock_info_t *Ublock_info, int_t *usub, double *uval, double *bigU, int_t ldu, int_t *xsup, int_t klst) |
void | dgather_l (int_t num_LBlk, int_t knsupc, Remain_info_t *L_info, double *lval, int_t LD_lval, double *L_buff) |
void | dRgather_L (int_t k, int_t *lsub, double *lusup, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno) |
void | dRgather_U (int_t k, int_t jj0, int_t *usub, double *uval, double *bigU, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno, int_t *perm_u) |
void | dbcastPermutedSparseA (SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
void | dnewTrfPartitionInit (int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
dtrf3Dpartition_t * | dinitTrf3Dpartition (int_t nsupers, superlu_dist_options_t *options, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
dtrf3Dpartition_t * | dinitTrf3Dpartition_allgrid (int_t n, superlu_dist_options_t *options, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
dtrf3Dpartition_t * | dinitTrf3DpartitionLUstructgrid0 (int_t n, superlu_dist_options_t *options, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
void | dDestroy_trf3Dpartition (dtrf3Dpartition_t *trf3Dpartition) |
void | d3D_printMemUse (dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
void | dinit3DLUstructForest (int_t *myTreeIdxs, int_t *myZeroTrIdxs, sForest_t **sForests, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
int_t | dgatherAllFactoredLUFr (int_t *myZeroTrIdxs, sForest_t *sForests, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dLpanelUpdate (int_t off0, int_t nsupc, double *ublk_ptr, int_t ld_ujrow, double *lusup, int_t nsupr, SCT_t *) |
void | dgstrf2 (int_t k, double *diagBlk, int_t LDA, double *BlockUfactor, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) |
void | Local_Dgstrf2 (superlu_dist_options_t *options, int_t k, double thresh, double *BlockUFactor, Glu_persist_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t *, int *info, SCT_t *) |
int_t | dTrs2_GatherU (int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, double *uval, double *tempv) |
int_t | dTrs2_ScatterU (int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, double *uval, double *tempv) |
int_t | dTrs2_GatherTrsmScatter (int_t klst, int_t iukp, int_t rukp, int_t *usub, double *uval, double *tempv, int_t knsupc, int nsupr, double *lusup, Glu_persist_t *Glu_persist) |
void | pdgstrs2 (int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, dLocalLU_t *Llu, SuperLUStat_t *stat) |
void | pdgstrf2 (superlu_dist_options_t *, int_t nsupers, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, dLocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *) |
int_t | dAllocLlu_3d (int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
int_t | dp3dScatter (int_t n, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask) |
int_t | dscatter3dLPanels (int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask) |
int_t | dscatter3dUPanels (int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask) |
int_t | dcollect3dLpanels (int_t layer, int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
int_t | dcollect3dUpanels (int_t layer, int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
int_t | dp3dCollect (int_t layer, int_t n, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
int_t | dzeroSetLU (int_t nnodes, int_t *nodeList, dLUstruct_t *, gridinfo3d_t *) |
int | dAllocGlu_3d (int_t n, int_t nsupers, dLUstruct_t *) |
int | dDeAllocLlu_3d (int_t n, dLUstruct_t *, gridinfo3d_t *) |
int | dDeAllocGlu_3d (dLUstruct_t *) |
int_t | dreduceAncestors3d (int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, double *Lval_buf, double *Uval_buf, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int | dreduceAllAncestors3d (int_t ilvl, int_t *myNodeCount, int_t **treePerm, dLUValSubBuf_t *LUvsb, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dgatherFactoredLU (int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, dLUValSubBuf_t *LUvsb, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dgatherAllFactoredLU (dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dinit3DLUstruct (int_t *myTreeIdxs, int_t *myZeroTrIdxs, int_t *nodeCount, int_t **nodeList, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
int_t | dzSendLPanel (int_t k, int_t receiver, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dzRecvLPanel (int_t k, int_t sender, double alpha, double beta, double *Lval_buf, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dzSendUPanel (int_t k, int_t receiver, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dzRecvUPanel (int_t k, int_t sender, double alpha, double beta, double *Uval_buf, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT) |
int_t | dIBcast_LPanel (int_t k, int_t k0, int_t *lsub, double *lusup, gridinfo_t *, int *msgcnt, MPI_Request *, int **ToSendR, int_t *xsup, int) |
int_t | dBcast_LPanel (int_t k, int_t k0, int_t *lsub, double *lusup, gridinfo_t *, int *msgcnt, int **ToSendR, int_t *xsup, SCT_t *, int) |
int_t | dIBcast_UPanel (int_t k, int_t k0, int_t *usub, double *uval, gridinfo_t *, int *msgcnt, MPI_Request *, int *ToSendD, int) |
int_t | dBcast_UPanel (int_t k, int_t k0, int_t *usub, double *uval, gridinfo_t *, int *msgcnt, int *ToSendD, SCT_t *, int) |
int_t | dIrecv_LPanel (int_t k, int_t k0, int_t *Lsub_buf, double *Lval_buf, gridinfo_t *, MPI_Request *, dLocalLU_t *, int) |
int_t | dIrecv_UPanel (int_t k, int_t k0, int_t *Usub_buf, double *, dLocalLU_t *, gridinfo_t *, MPI_Request *, int) |
int_t | dWait_URecv (MPI_Request *, int *msgcnt, SCT_t *) |
int_t | dWait_LRecv (MPI_Request *, int *msgcnt, int *msgcntsU, gridinfo_t *, SCT_t *) |
int_t | dISend_UDiagBlock (int_t k0, double *ublk_ptr, int_t size, MPI_Request *, gridinfo_t *, int) |
int_t | dRecv_UDiagBlock (int_t k0, double *ublk_ptr, int_t size, int_t src, gridinfo_t *, SCT_t *, int) |
int_t | dPackLBlock (int_t k, double *Dest, Glu_persist_t *, gridinfo_t *, dLocalLU_t *) |
int_t | dISend_LDiagBlock (int_t k0, double *lblk_ptr, int_t size, MPI_Request *, gridinfo_t *, int) |
int_t | dIRecv_UDiagBlock (int_t k0, double *ublk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int) |
int_t | dIRecv_LDiagBlock (int_t k0, double *L_blk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int) |
int_t | dUDiagBlockRecvWait (int_t k, int *IrecvPlcd_D, int *factored_L, MPI_Request *, gridinfo_t *, dLUstruct_t *, SCT_t *) |
int_t | dDiagFactIBCast (int_t k, int_t k0, double *BlockUFactor, double *BlockLFactor, int *IrecvPlcd_D, MPI_Request *, MPI_Request *, MPI_Request *, MPI_Request *, gridinfo_t *, superlu_dist_options_t *, double thresh, dLUstruct_t *LUstruct, SuperLUStat_t *, int *info, SCT_t *, int tag_ub) |
int_t | dUPanelTrSolve (int_t k, double *BlockLFactor, double *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, SCT_t *) |
int_t | dLPanelUpdate (int_t k, int *IrecvPlcd_D, int *factored_L, MPI_Request *, double *BlockUFactor, gridinfo_t *, dLUstruct_t *, SCT_t *) |
int_t | dUPanelUpdate (int_t k, int *factored_U, MPI_Request *, double *BlockLFactor, double *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, SCT_t *) |
int_t | dIBcastRecvLPanel (int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Lsub_buf, double *Lval_buf, int *factored, gridinfo_t *, dLUstruct_t *, SCT_t *, int tag_ub) |
int_t | dIBcastRecvUPanel (int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Usub_buf, double *Uval_buf, gridinfo_t *, dLUstruct_t *, SCT_t *, int tag_ub) |
int_t | dWaitL (int_t k, int *msgcnt, int *msgcntU, MPI_Request *, MPI_Request *, gridinfo_t *, dLUstruct_t *, SCT_t *) |
int_t | dWaitU (int_t k, int *msgcnt, MPI_Request *, MPI_Request *, gridinfo_t *, dLUstruct_t *, SCT_t *) |
int_t | dLPanelTrSolve (int_t k, int *factored_L, double *BlockUFactor, gridinfo_t *, dLUstruct_t *) |
int | getNsupers (int, Glu_persist_t *) |
int_t | initPackLUInfo (int_t nsupers, packLUInfo_t *packLUInfo) |
int | freePackLUInfo (packLUInfo_t *packLUInfo) |
int_t | dSchurComplementSetup (int_t k, int *msgcnt, Ublock_info_t *, Remain_info_t *, uPanelInfo_t *, lPanelInfo_t *, int_t *, int_t *, int_t *, double *bigU, int_t *Lsub_buf, double *Lval_buf, int_t *Usub_buf, double *Uval_buf, gridinfo_t *, dLUstruct_t *) |
int_t | dSchurComplementSetupGPU (int_t k, msgs_t *msgs, packLUInfo_t *, int_t *, int_t *, int_t *, gEtreeInfo_t *, factNodelists_t *, dscuBufs_t *, dLUValSubBuf_t *LUvsb, gridinfo_t *, dLUstruct_t *, HyP_t *) |
double * | dgetBigV (int_t, int_t) |
double * | dgetBigU (superlu_dist_options_t *, int_t, gridinfo_t *, dLUstruct_t *) |
int_t | dLluBufInit (dLUValSubBuf_t *, dLUstruct_t *) |
int_t | dinitScuBufs (superlu_dist_options_t *, int_t ldt, int_t num_threads, int_t nsupers, dscuBufs_t *, dLUstruct_t *, gridinfo_t *) |
int | dfreeScuBufs (dscuBufs_t *scuBufs) |
int_t | dsparseTreeFactor (int_t nnodes, int_t *perm_c_supno, treeTopoInfo_t *treeTopoInfo, commRequests_t *comReqs, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, dLUValSubBuf_t *LUvsb, ddiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int *info) |
int_t | ddenseTreeFactor (int_t nnnodes, int_t *perm_c_supno, commRequests_t *comReqs, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, dLUValSubBuf_t *LUvsb, ddiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info) |
int_t | dsparseTreeFactor_ASYNC (sForest_t *sforest, commRequests_t **comReqss, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, dLUValSubBuf_t **LUvsbs, ddiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, HyP_t *HyP, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info) |
dLUValSubBuf_t ** | dLluBufInitArr (int_t numLA, dLUstruct_t *LUstruct) |
int | dLluBufFreeArr (int_t numLA, dLUValSubBuf_t **LUvsbs) |
ddiagFactBufs_t ** | dinitDiagFactBufsArr (int mxLeafNode, int ldt, gridinfo_t *grid) |
ddiagFactBufs_t ** | dinitDiagFactBufsArrMod (int mxLeafNode, int *ldts, gridinfo_t *grid) |
int | dfreeDiagFactBufsArr (int mxLeafNode, ddiagFactBufs_t **dFBufs) |
int | dinitDiagFactBufs (int ldt, ddiagFactBufs_t *dFBuf) |
int_t | checkRecvUDiag (int_t k, commRequests_t *comReqs, gridinfo_t *grid, SCT_t *SCT) |
int_t | checkRecvLDiag (int_t k, commRequests_t *comReqs, gridinfo_t *, SCT_t *) |
int | pdflatten_LDATA (superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat) |
void | pdconvert_flatten_skyline2UROWDATA (superlu_dist_options_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, int n) |
void | pdconvertUROWDATA2skyline (superlu_dist_options_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, int n) |
int_t | dReDistribute_A (SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno, gridinfo_t *grid, int_t *colptr[], int_t *rowind[], double *a[]) |
float | pddistribute3d_Yang (superlu_dist_options_t *options, int_t n, SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
int | pdgssvx3d_csc_batch (superlu_dist_options_t *, int batchCount, int m, int n, int nnz, int nrhs, handle_t *, double **RHSptr, int *ldRHS, double **ReqPtr, double **CeqPtr, int **RpivPtr, int **CpivPtr, DiagScale_t *DiagScale, handle_t *F, double **Xptr, int *ldX, double **Berrs, gridinfo3d_t *grid3d, SuperLUStat_t *stat, int *info) |
Solve a batch of linear systems Ai * Xi = Bi with direct method, computing the LU factorization of each matrix Ai; This is the fixed-size interface: all the input matrices have the same sparsity structure. More... | |
int | dequil_batch (superlu_dist_options_t *, int batchCount, int m, int n, handle_t *, double **ReqPtr, double **CeqPtr, DiagScale_t *) |
Equilibrate the systems using the LAPACK-style algorithm. More... | |
int | dpivot_batch (superlu_dist_options_t *, int batchCount, int m, int n, handle_t *, double **ReqPtr, double **CeqPtr, DiagScale_t *, int **RpivPtr) |
Compute row pivotings for each matrix, for numerical stability. More... | |
Variables | |
double * | dready_x |
double * | dready_lsum |
Distributed SuperLU data types and function prototypes.
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
All rights reserved.
The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-- Distributed SuperLU routine (version 9.0) -- Lawrence Berkeley National Lab, Univ. of California Berkeley, Georgia Institute of Technology November 1, 2007 April 5, 2015 September 18, 2018 version 6.0 February 8, 2019 version 6.1.1 May 10, 2019 version 7.0.0
#define MAX_LOOKAHEADS 50 |
typedef struct dlsumBmod_buff_t dlsumBmod_buff_t |
typedef struct dxT_struct dxT_struct |
int_t checkRecvLDiag | ( | int_t | k, |
commRequests_t * | comReqs, | ||
gridinfo_t * | grid, | ||
SCT_t * | SCT | ||
) |
int_t checkRecvUDiag | ( | int_t | k, |
commRequests_t * | comReqs, | ||
gridinfo_t * | grid, | ||
SCT_t * | SCT | ||
) |
void d3D_printMemUse | ( | dtrf3Dpartition_t * | trf3Dpartition, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
int d_c2cpp_GetHWPM | ( | SuperMatrix * | A, |
gridinfo_t * | grid, | ||
dScalePermstruct_t * | ScalePermstruct | ||
) |
Purpose ======= Get heavy-weight perfect matching (HWPM). Reference: Arguments ========= A (input) SuperMatrix* The distributed input matrix A of dimension (A->nrow, A->ncol). The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. grid (input) gridinfo_t* SuperLU's 2D process mesh. ScalePermstruct (output) dScalePermstruct_t* ScalePermstruct->perm_r stores the permutation obtained from HWPM.
int dAllocGlu_3d | ( | int_t | n, |
int_t | nsupers, | ||
dLUstruct_t * | LUstruct | ||
) |
int_t dAllocLlu_3d | ( | int_t | nsupers, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
void dallocScalePermstruct_RC | ( | dScalePermstruct_t * | ScalePermstruct, |
int_t | m, | ||
int_t | n | ||
) |
int daxpy_ | ( | const int * | n, |
const double * | alpha, | ||
const double * | x, | ||
const int * | incx, | ||
double * | y, | ||
const int * | incy | ||
) |
int_t dBcast_LPanel | ( | int_t | k, |
int_t | k0, | ||
int_t * | lsub, | ||
double * | lusup, | ||
gridinfo_t * | , | ||
int * | msgcnt, | ||
int ** | ToSendR, | ||
int_t * | xsup, | ||
SCT_t * | , | ||
int | |||
) |
int_t dBcast_UPanel | ( | int_t | k, |
int_t | k0, | ||
int_t * | usub, | ||
double * | uval, | ||
gridinfo_t * | , | ||
int * | msgcnt, | ||
int * | ToSendD, | ||
SCT_t * | , | ||
int | |||
) |
void dbcastPermutedSparseA | ( | SuperMatrix * | A, |
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_freeable_t * | Glu_freeable, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
int_t dbCastXk2Pck | ( | int_t | k, |
dxT_struct * | xT_s, | ||
int | nrhs, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dblock_gemm_scatter | ( | int_t | lb, |
int_t | j, | ||
Ublock_info_t * | Ublock_info, | ||
Remain_info_t * | Remain_info, | ||
double * | L_mat, | ||
int | ldl, | ||
double * | U_mat, | ||
int | ldu, | ||
double * | bigV, | ||
int_t | knsupc, | ||
int_t | klst, | ||
int_t * | lsub, | ||
int_t * | usub, | ||
int_t | ldt, | ||
int_t | thread_id, | ||
int * | indirect, | ||
int * | indirect2, | ||
int_t ** | Lrowind_bc_ptr, | ||
double ** | Lnzval_bc_ptr, | ||
int_t ** | Ufstnz_br_ptr, | ||
double ** | Unzval_br_ptr, | ||
int_t * | xsup, | ||
gridinfo_t * | , | ||
SuperLUStat_t * | |||
) |
int_t dblock_gemm_scatterBottomLeft | ( | int_t | lb, |
int_t | j, | ||
double * | bigV, | ||
int_t | knsupc, | ||
int_t | klst, | ||
int_t * | lsub, | ||
int_t * | usub, | ||
int_t | ldt, | ||
int * | indirect, | ||
int * | indirect2, | ||
HyP_t * | HyP, | ||
dLUstruct_t * | , | ||
gridinfo_t * | , | ||
SCT_t * | SCT, | ||
SuperLUStat_t * | |||
) |
int_t dblock_gemm_scatterBottomRight | ( | int_t | lb, |
int_t | j, | ||
double * | bigV, | ||
int_t | knsupc, | ||
int_t | klst, | ||
int_t * | lsub, | ||
int_t * | usub, | ||
int_t | ldt, | ||
int * | indirect, | ||
int * | indirect2, | ||
HyP_t * | HyP, | ||
dLUstruct_t * | , | ||
gridinfo_t * | , | ||
SCT_t * | SCT, | ||
SuperLUStat_t * | |||
) |
int_t dblock_gemm_scatterTopLeft | ( | int_t | lb, |
int_t | j, | ||
double * | bigV, | ||
int_t | knsupc, | ||
int_t | klst, | ||
int_t * | lsub, | ||
int_t * | usub, | ||
int_t | ldt, | ||
int * | indirect, | ||
int * | indirect2, | ||
HyP_t * | HyP, | ||
dLUstruct_t * | , | ||
gridinfo_t * | , | ||
SCT_t * | SCT, | ||
SuperLUStat_t * | |||
) |
int_t dblock_gemm_scatterTopRight | ( | int_t | lb, |
int_t | j, | ||
double * | bigV, | ||
int_t | knsupc, | ||
int_t | klst, | ||
int_t * | lsub, | ||
int_t * | usub, | ||
int_t | ldt, | ||
int * | indirect, | ||
int * | indirect2, | ||
HyP_t * | HyP, | ||
dLUstruct_t * | , | ||
gridinfo_t * | , | ||
SCT_t * | SCT, | ||
SuperLUStat_t * | |||
) |
int_t dbroadcastAncestor3d | ( | dtrf3Dpartition_t * | trf3Dpartition, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int_t dbsolve_Xt_bcast | ( | int_t | ilvl, |
dxT_struct * | xT_s, | ||
int | nrhs, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dClone_CompRowLoc_Matrix_dist | ( | SuperMatrix * | , |
SuperMatrix * | |||
) |
int_t dcollect3dLpanels | ( | int_t | layer, |
int_t | nsupers, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
int_t dcollect3dUpanels | ( | int_t | layer, |
int_t | nsupers, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
void dCompCol_to_CompRow_dist | ( | int_t | m, |
int_t | n, | ||
int_t | nnz, | ||
double * | a, | ||
int_t * | colptr, | ||
int_t * | rowind, | ||
double ** | at, | ||
int_t ** | rowptr, | ||
int_t ** | colind | ||
) |
void dCompRow_to_CompCol_dist | ( | int_t | , |
int_t | , | ||
int_t | , | ||
double * | , | ||
int_t * | , | ||
int_t * | , | ||
double ** | , | ||
int_t ** | , | ||
int_t ** | |||
) |
double dcomputeA_Norm | ( | int | notran, |
SuperMatrix * | A, | ||
gridinfo_t * | grid | ||
) |
This function computes the norm of a matrix A.
notran | A flag which determines the norm type to be calculated. |
A | The input matrix for which the norm is computed. |
grid | The gridinfo_t object that contains the information of the grid. |
the iam process is the root (iam=0), it prints the computed norm to the standard output.
void dComputeLevelsets | ( | int | iam, |
int_t | nsupers, | ||
gridinfo_t * | grid, | ||
Glu_persist_t * | Glu_persist, | ||
dLocalLU_t * | Llu, | ||
int_t * | levels | ||
) |
\Compute the level sets in the L factor
void dCopy_CompCol_Matrix_dist | ( | SuperMatrix * | , |
SuperMatrix * | |||
) |
void dCopy_CompRowLoc_Matrix_dist | ( | SuperMatrix * | A, |
SuperMatrix * | B | ||
) |
int dcreate_batch_systems | ( | handle_t * | SparseMatrix_handles, |
int | batchCount, | ||
int | nrhs, | ||
double ** | rhs, | ||
int * | ldb, | ||
double ** | x, | ||
int * | ldx, | ||
FILE * | fp, | ||
char * | postfix, | ||
gridinfo3d_t * | grid3d | ||
) |
int dcreate_block_diag_3d | ( | SuperMatrix * | A, |
int | batchCount, | ||
int | nrhs, | ||
double ** | rhs, | ||
int * | ldb, | ||
double ** | x, | ||
int * | ldx, | ||
FILE * | fp, | ||
char * | postfix, | ||
gridinfo3d_t * | grid3d | ||
) |
void dCreate_CompCol_Matrix_dist | ( | SuperMatrix * | , |
int_t | , | ||
int_t | , | ||
int_t | , | ||
double * | , | ||
int_t * | , | ||
int_t * | , | ||
Stype_t | , | ||
Dtype_t | , | ||
Mtype_t | |||
) |
void dCreate_CompRowLoc_Matrix_dist | ( | SuperMatrix * | , |
int_t | , | ||
int_t | , | ||
int_t | , | ||
int_t | , | ||
int_t | , | ||
double * | , | ||
int_t * | , | ||
int_t * | , | ||
Stype_t | , | ||
Dtype_t | , | ||
Mtype_t | |||
) |
void dCreate_Dense_Matrix_dist | ( | SuperMatrix * | , |
int_t | , | ||
int_t | , | ||
double * | , | ||
int_t | , | ||
Stype_t | , | ||
Dtype_t | , | ||
Mtype_t | |||
) |
int dcreate_matrix | ( | SuperMatrix * | A, |
int | nrhs, | ||
double ** | rhs, | ||
int * | ldb, | ||
double ** | x, | ||
int * | ldx, | ||
FILE * | fp, | ||
gridinfo_t * | grid | ||
) |
int dcreate_matrix3d | ( | SuperMatrix * | A, |
int | nrhs, | ||
double ** | rhs, | ||
int * | ldb, | ||
double ** | x, | ||
int * | ldx, | ||
FILE * | fp, | ||
gridinfo3d_t * | grid3d | ||
) |
int dcreate_matrix_dat | ( | SuperMatrix * | , |
int | , | ||
double ** | , | ||
int * | , | ||
double ** | , | ||
int * | , | ||
FILE * | , | ||
gridinfo_t * | |||
) |
int dcreate_matrix_postfix | ( | SuperMatrix * | A, |
int | nrhs, | ||
double ** | rhs, | ||
int * | ldb, | ||
double ** | x, | ||
int * | ldx, | ||
FILE * | fp, | ||
char * | postfix, | ||
gridinfo_t * | grid | ||
) |
int dcreate_matrix_postfix3d | ( | SuperMatrix * | A, |
int | nrhs, | ||
double ** | rhs, | ||
int * | ldb, | ||
double ** | x, | ||
int * | ldx, | ||
FILE * | fp, | ||
char * | postfix, | ||
gridinfo3d_t * | grid3d | ||
) |
int dcreate_matrix_rb | ( | SuperMatrix * | , |
int | , | ||
double ** | , | ||
int * | , | ||
double ** | , | ||
int * | , | ||
FILE * | , | ||
gridinfo_t * | |||
) |
void dCreate_SuperNode_Matrix_dist | ( | SuperMatrix * | , |
int_t | , | ||
int_t | , | ||
int_t | , | ||
double * | , | ||
int_t * | , | ||
int_t * | , | ||
int_t * | , | ||
int_t * | , | ||
int_t * | , | ||
Stype_t | , | ||
Dtype_t | , | ||
Mtype_t | |||
) |
int dDeAllocGlu_3d | ( | dLUstruct_t * | LUstruct | ) |
int dDeAllocLlu_3d | ( | int_t | n, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
void ddelete_multiGPU_buffers | ( | ) |
int_t ddenseTreeFactor | ( | int_t | nnnodes, |
int_t * | perm_c_supno, | ||
commRequests_t * | comReqs, | ||
dscuBufs_t * | scuBufs, | ||
packLUInfo_t * | packLUInfo, | ||
msgs_t * | msgs, | ||
dLUValSubBuf_t * | LUvsb, | ||
ddiagFactBufs_t * | dFBuf, | ||
factStat_t * | factStat, | ||
factNodelists_t * | fNlists, | ||
superlu_dist_options_t * | options, | ||
int_t * | gIperm_c_supno, | ||
int_t | ldt, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SuperLUStat_t * | stat, | ||
double | thresh, | ||
SCT_t * | SCT, | ||
int | tag_ub, | ||
int * | info | ||
) |
void dDestroy_A3d_gathered_on_2d | ( | dSOLVEstruct_t * | SOLVEstruct, |
gridinfo3d_t * | grid3d | ||
) |
void dDestroy_LU | ( | int_t | n, |
gridinfo_t * | grid, | ||
dLUstruct_t * | LUstruct | ||
) |
Destroy distributed L & U matrices.
void dDestroy_Tree | ( | int_t | n, |
gridinfo_t * | grid, | ||
dLUstruct_t * | LUstruct | ||
) |
Destroy broadcast and reduction trees used in triangular solve.
void dDestroy_trf3Dpartition | ( | dtrf3Dpartition_t * | trf3Dpartition | ) |
int_t dDiagFactIBCast | ( | int_t | k, |
int_t | k0, | ||
double * | BlockUFactor, | ||
double * | BlockLFactor, | ||
int * | IrecvPlcd_D, | ||
MPI_Request * | , | ||
MPI_Request * | , | ||
MPI_Request * | , | ||
MPI_Request * | , | ||
gridinfo_t * | , | ||
superlu_dist_options_t * | , | ||
double | thresh, | ||
dLUstruct_t * | LUstruct, | ||
SuperLUStat_t * | , | ||
int * | info, | ||
SCT_t * | , | ||
int | tag_ub | ||
) |
float ddist_psymbtonum | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
SuperMatrix * | A, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
Pslu_freeable_t * | Pslu_freeable, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid | ||
) |
Purpose ======= Distribute the input matrix onto the 2D process mesh. Arguments ========= options (input) superlu_dist_options_t* The structure defines the input parameters to control how the LU decomposition and triangular solve are performed. options->Fact specifies whether or not the L and U structures will be re-used: = SamePattern_SameRowPerm: L and U structures are input, and unchanged on exit. This routine should not be called for this case, an error is generated. Instead, pddistribute routine should be called. = DOFACT or SamePattern: L and U structures are computed and output. n (Input) int Dimension of the matrix. A (Input) SuperMatrix* The distributed input matrix A of dimension (A->nrow, A->ncol). A may be overwritten by diag(R)*A*diag(C)*Pc^T. The type of A can be: Stype = NR; Dtype = SLU_D; Mtype = GE. ScalePermstruct (Input) dScalePermstruct_t* The data structure to store the scaling and permutation vectors describing the transformations performed to the original matrix A. Glu_freeable (Input) *Glu_freeable_t The global structure describing the graph of L and U. LUstruct (Input) dLUstruct_t* Data structures for L and U factors. grid (Input) gridinfo_t* The 2D process mesh. Return value ============ < 0, number of bytes allocated on return from the dist_symbLU > 0, number of bytes allocated for performing the distribution of the data, when out of memory. (an approximation).
Purpose ======= Distribute the input matrix onto the 2D process mesh. Arguments ========= options (input) superlu_dist_options_t* The structure defines the input parameters to control how the LU decomposition and triangular solve are performed. options->Fact specifies whether or not the L and U structures will be re-used: = SamePattern_SameRowPerm: L and U structures are input, and unchanged on exit. This routine should not be called for this case, an error is generated. Instead, pddistribute routine should be called. = DOFACT or SamePattern: L and U structures are computed and output. n (Input) int Dimension of the matrix. A (Input) SuperMatrix* The distributed input matrix A of dimension (A->nrow, A->ncol). A may be overwritten by diag(R)*A*diag(C)*Pc^T. The type of A can be: Stype = NR; Dtype = SLU_D; Mtype = GE. ScalePermstruct (Input) dScalePermstruct_t* The data structure to store the scaling and permutation vectors describing the transformations performed to the original matrix A. Glu_freeable (Input) *Glu_freeable_t The global structure describing the graph of L and U. LUstruct (Input) dLUstruct_t* Data structures for L and U factors. grid (Input) gridinfo_t* The 2D process mesh. Return value ============ < 0, number of bytes allocated on return > 0, number of bytes allocated for performing the distribution of the data, when out of memory. (an approximation).
float ddistribute | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
SuperMatrix * | A, | ||
Glu_freeable_t * | Glu_freeable, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid | ||
) |
Purpose ======= Distribute the matrix onto the 2D process mesh. Arguments ========= options (input) superlu_dist_options_t * options->Fact specifies whether or not the L and U structures will be re-used. = SamePattern_SameRowPerm: L and U structures are input, and unchanged on exit. = DOFACT or SamePattern: L and U structures are computed and output. n (input) int Dimension of the matrix. A (input) SuperMatrix* The original matrix A, permuted by columns, of dimension (A->nrow, A->ncol). The type of A can be: Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. LUstruct (input) dLUstruct_t* Data structures for L and U factors. grid (input) gridinfo_t* The 2D process mesh. Return value ============ > 0, working storage (in bytes) required to perform redistribution. (excluding LU factor size)
int dequil_batch | ( | superlu_dist_options_t * | options, |
int | batchCount, | ||
int | m, | ||
int | n, | ||
handle_t * | SparseMatrix_handles, | ||
double ** | ReqPtr, | ||
double ** | CeqPtr, | ||
DiagScale_t * | DiagScale | ||
) |
Equilibrate the systems using the LAPACK-style algorithm.
[in] | options | solver options |
[in] | batchCount | number of matrices in the batch |
[in] | m | row dimension of the matrices |
[in] | n | column dimension of the matrices |
[in,out] | SparseMatrix_handles | pointers to the matrices in the batch, each pointing to the actual stoage in CSC format On entry, the original matrices On exit, each matrix may be overwritten by diag(R)*A*diag(C) |
[out] | ReqPtr | pointers to row scaling vectors (allocated internally) |
[out] | CeqPtr | pointers to column scaling vectors (allocated internally) |
[in,out] | DiagScale | arrays indicating how each system is equilibrated: {ROW, COL, BOTH} |
Return value i: = 0: successful exit > 0: indicates the first matrix in the batch has zero row or column if i <= m: the i-th row of A is exactly zero if i > m: the (i-m)-th column of A is exactly zero
void dfill_dist | ( | double * | a, |
int_t | alen, | ||
double | dval | ||
) |
Fills a double precision array with a given value.
void dFillRHS_dist | ( | char * | trans, |
int_t | nrhs, | ||
double * | x, | ||
int_t | ldx, | ||
SuperMatrix * | A, | ||
double * | rhs, | ||
int_t | ldb | ||
) |
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's.
int dfreeDiagFactBufsArr | ( | int | mxLeafNode, |
ddiagFactBufs_t ** | dFBufs | ||
) |
int dfreeScuBufs | ( | dscuBufs_t * | scuBufs | ) |
int_t dfsolveReduceLsum3d | ( | int_t | treeId, |
int_t | sender, | ||
int_t | receiver, | ||
double * | lsum, | ||
double * | recvbuf, | ||
int | nrhs, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dgather_l | ( | int_t | num_LBlk, |
int_t | knsupc, | ||
Remain_info_t * | L_info, | ||
double * | lval, | ||
int_t | LD_lval, | ||
double * | L_buff | ||
) |
void dgather_u | ( | int_t | num_u_blks, |
Ublock_info_t * | Ublock_info, | ||
int_t * | usub, | ||
double * | uval, | ||
double * | bigU, | ||
int_t | ldu, | ||
int_t * | xsup, | ||
int_t | klst | ||
) |
int_t dgatherAllFactoredLU | ( | dtrf3Dpartition_t * | trf3Dpartition, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int_t dgatherAllFactoredLUFr | ( | int_t * | myZeroTrIdxs, |
sForest_t * | sForests, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int_t dgatherFactoredLU | ( | int_t | sender, |
int_t | receiver, | ||
int_t | nnodes, | ||
int_t * | nodeList, | ||
dLUValSubBuf_t * | LUvsb, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
void dGatherNRformat_loc3d | ( | fact_t | Fact, |
NRformat_loc * | A, | ||
double * | B, | ||
int | ldb, | ||
int | nrhs, | ||
gridinfo3d_t * | grid3d, | ||
NRformat_loc3d ** | |||
) |
void dGatherNRformat_loc3d_allgrid | ( | fact_t | Fact, |
NRformat_loc * | A, | ||
double * | B, | ||
int | ldb, | ||
int | nrhs, | ||
gridinfo3d_t * | grid3d, | ||
NRformat_loc3d ** | |||
) |
int dgemm_ | ( | const char * | , |
const char * | , | ||
const int * | , | ||
const int * | , | ||
const int * | , | ||
const double * | , | ||
const double * | , | ||
const int * | , | ||
const double * | , | ||
const int * | , | ||
const double * | , | ||
double * | , | ||
const int * | |||
) |
void dgemv_ | ( | const char * | , |
const int * | , | ||
const int * | , | ||
const double * | , | ||
const double * | a, | ||
const int * | , | ||
const double * | , | ||
const int * | , | ||
const double * | , | ||
double * | , | ||
const int * | |||
) |
void dGenCOOLblocks | ( | int | iam, |
int_t | nsupers, | ||
gridinfo_t * | grid, | ||
Glu_persist_t * | Glu_persist, | ||
dLocalLU_t * | Llu, | ||
int_t ** | cooRows, | ||
int_t ** | cooCols, | ||
double ** | cooVals, | ||
int_t * | n, | ||
int_t * | nnzL | ||
) |
\Dump the factored matrix L using matlab triple-let format
void dGenCSCLblocks | ( | int | iam, |
int_t | nsupers, | ||
gridinfo_t * | grid, | ||
Glu_persist_t * | Glu_persist, | ||
dLocalLU_t * | Llu, | ||
double ** | nzval, | ||
int_t ** | rowind, | ||
int_t ** | colptr, | ||
int_t * | n, | ||
int_t * | nnzL | ||
) |
\Dump the factored matrix L using CSC format
void dGenCSRLblocks | ( | int | iam, |
int_t | nsupers, | ||
gridinfo_t * | grid, | ||
Glu_persist_t * | Glu_persist, | ||
dLocalLU_t * | Llu, | ||
double ** | nzval, | ||
int_t ** | colind, | ||
int_t ** | rowptr, | ||
int_t * | n, | ||
int_t * | nnzL | ||
) |
\Dump the factored matrix L using CSR format
void dger_ | ( | const int * | , |
const int * | , | ||
const double * | , | ||
const double * | , | ||
const int * | , | ||
const double * | , | ||
const int * | , | ||
double * | , | ||
const int * | |||
) |
double * dgetBigU | ( | superlu_dist_options_t * | , |
int_t | , | ||
gridinfo_t * | , | ||
dLUstruct_t * | |||
) |
void dgsequ_dist | ( | SuperMatrix * | A, |
double * | r, | ||
double * | c, | ||
double * | rowcnd, | ||
double * | colcnd, | ||
double * | amax, | ||
int * | info | ||
) |
Purpose ======= DGSEQU_DIST computes row and column scalings intended to equilibrate an M-by-N sparse matrix A and reduce its condition number. R returns the row scale factors and C the column scale factors, chosen to try to make the largest element in each row and column of the matrix B with elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. R(i) and C(j) are restricted to be between SMLNUM = smallest safe number and BIGNUM = largest safe number. Use of these scaling factors is not guaranteed to reduce the condition number of A but works well in practice. See supermatrix.h for the definition of 'SuperMatrix' structure. Arguments ========= A (input) SuperMatrix* The matrix of dimension (A->nrow, A->ncol) whose equilibration factors are to be computed. The type of A can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. R (output) double*, size A->nrow If INFO = 0 or INFO > M, R contains the row scale factors for A. C (output) double*, size A->ncol If INFO = 0, C contains the column scale factors for A. ROWCND (output) double* If INFO = 0 or INFO > M, ROWCND contains the ratio of the smallest R(i) to the largest R(i). If ROWCND >= 0.1 and AMAX is neither too large nor too small, it is not worth scaling by R. COLCND (output) double* If INFO = 0, COLCND contains the ratio of the smallest C(i) to the largest C(i). If COLCND >= 0.1, it is not worth scaling by C. AMAX (output) double* Absolute value of largest matrix element. If AMAX is very close to overflow or very close to underflow, the matrix should be scaled. INFO (output) int* = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= A->nrow: the i-th row of A is exactly zero > A->ncol: the (i-M)-th column of A is exactly zero =====================================================================
void dgstrf2 | ( | int_t | k, |
double * | diagBlk, | ||
int_t | LDA, | ||
double * | BlockUfactor, | ||
int_t | LDU, | ||
double | thresh, | ||
int_t * | xsup, | ||
superlu_dist_options_t * | options, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
int_t dIBcast_LPanel | ( | int_t | k, |
int_t | k0, | ||
int_t * | lsub, | ||
double * | lusup, | ||
gridinfo_t * | , | ||
int * | msgcnt, | ||
MPI_Request * | , | ||
int ** | ToSendR, | ||
int_t * | xsup, | ||
int | |||
) |
int_t dIBcast_UPanel | ( | int_t | k, |
int_t | k0, | ||
int_t * | usub, | ||
double * | uval, | ||
gridinfo_t * | , | ||
int * | msgcnt, | ||
MPI_Request * | , | ||
int * | ToSendD, | ||
int | |||
) |
int_t dIBcastRecvLPanel | ( | int_t | k, |
int_t | k0, | ||
int * | msgcnt, | ||
MPI_Request * | , | ||
MPI_Request * | , | ||
int_t * | Lsub_buf, | ||
double * | Lval_buf, | ||
int * | factored, | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SCT_t * | , | ||
int | tag_ub | ||
) |
int_t dIBcastRecvUPanel | ( | int_t | k, |
int_t | k0, | ||
int * | msgcnt, | ||
MPI_Request * | , | ||
MPI_Request * | , | ||
int_t * | Usub_buf, | ||
double * | Uval_buf, | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SCT_t * | , | ||
int | tag_ub | ||
) |
int_t diBcastXk2Pck | ( | int_t | k, |
double * | x, | ||
int | nrhs, | ||
int ** | sendList, | ||
MPI_Request * | send_req, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dinf_norm_error_dist | ( | int_t | n, |
int_t | nrhs, | ||
double * | x, | ||
int_t | ldx, | ||
double * | xtrue, | ||
int_t | ldxtrue, | ||
gridinfo_t * | grid | ||
) |
Check the inf-norm of the error vector.
int_t dinit3DLUstruct | ( | int_t * | myTreeIdxs, |
int_t * | myZeroTrIdxs, | ||
int_t * | nodeCount, | ||
int_t ** | nodeList, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
void dinit3DLUstructForest | ( | int_t * | myTreeIdxs, |
int_t * | myZeroTrIdxs, | ||
sForest_t ** | sForests, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
void dInit_HyP | ( | superlu_dist_options_t * | , |
HyP_t * | HyP, | ||
dLocalLU_t * | Llu, | ||
int_t | mcb, | ||
int_t | mrb | ||
) |
int dinitDiagFactBufs | ( | int | ldt, |
ddiagFactBufs_t * | dFBuf | ||
) |
ddiagFactBufs_t ** dinitDiagFactBufsArr | ( | int | mxLeafNode, |
int | ldt, | ||
gridinfo_t * | grid | ||
) |
ddiagFactBufs_t ** dinitDiagFactBufsArrMod | ( | int | mxLeafNode, |
int * | ldts, | ||
gridinfo_t * | grid | ||
) |
int_t dinitLsumBmod_buff | ( | int_t | ns, |
int | nrhs, | ||
dlsumBmod_buff_t * | lbmod_buf | ||
) |
int_t dinitScuBufs | ( | superlu_dist_options_t * | , |
int_t | ldt, | ||
int_t | num_threads, | ||
int_t | nsupers, | ||
dscuBufs_t * | , | ||
dLUstruct_t * | , | ||
gridinfo_t * | |||
) |
dtrf3Dpartition_t * dinitTrf3Dpartition | ( | int_t | nsupers, |
superlu_dist_options_t * | options, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
dtrf3Dpartition_t * dinitTrf3Dpartition_allgrid | ( | int_t | n, |
superlu_dist_options_t * | options, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
dtrf3Dpartition_t * dinitTrf3DpartitionLUstructgrid0 | ( | int_t | n, |
superlu_dist_options_t * | options, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
int_t dIRecv_LDiagBlock | ( | int_t | k0, |
double * | L_blk_ptr, | ||
int_t | size, | ||
int_t | src, | ||
MPI_Request * | , | ||
gridinfo_t * | , | ||
SCT_t * | , | ||
int | |||
) |
int_t dIrecv_LPanel | ( | int_t | k, |
int_t | k0, | ||
int_t * | Lsub_buf, | ||
double * | Lval_buf, | ||
gridinfo_t * | , | ||
MPI_Request * | , | ||
dLocalLU_t * | , | ||
int | |||
) |
int_t dIRecv_UDiagBlock | ( | int_t | k0, |
double * | ublk_ptr, | ||
int_t | size, | ||
int_t | src, | ||
MPI_Request * | , | ||
gridinfo_t * | , | ||
SCT_t * | , | ||
int | |||
) |
int_t dIrecv_UPanel | ( | int_t | k, |
int_t | k0, | ||
int_t * | Usub_buf, | ||
double * | , | ||
dLocalLU_t * | , | ||
gridinfo_t * | , | ||
MPI_Request * | , | ||
int | |||
) |
int_t dISend_LDiagBlock | ( | int_t | k0, |
double * | lblk_ptr, | ||
int_t | size, | ||
MPI_Request * | , | ||
gridinfo_t * | , | ||
int | |||
) |
int_t dISend_UDiagBlock | ( | int_t | k0, |
double * | ublk_ptr, | ||
int_t | size, | ||
MPI_Request * | , | ||
gridinfo_t * | , | ||
int | |||
) |
double dlangs_dist | ( | char * | norm, |
SuperMatrix * | A | ||
) |
Purpose ======= DLANGS_DIST returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real matrix A. Description =========== DLANGE returns the value DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' ( ( norm1(A), NORM = '1', 'O' or 'o' ( ( normI(A), NORM = 'I' or 'i' ( ( normF(A), NORM = 'F', 'f', 'E' or 'e' where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. Arguments ========= NORM (input) CHARACTER*1 Specifies the value to be returned in DLANGE as described above. A (input) SuperMatrix* The M by N sparse matrix A. =====================================================================
void dlaqgs_dist | ( | SuperMatrix * | A, |
double * | r, | ||
double * | c, | ||
double | rowcnd, | ||
double | colcnd, | ||
double | amax, | ||
char * | equed | ||
) |
Purpose ======= DLAQGS_DIST equilibrates a general sparse M by N matrix A using the row and scaling factors in the vectors R and C. See supermatrix.h for the definition of 'SuperMatrix' structure. Arguments ========= A (input/output) SuperMatrix* On exit, the equilibrated matrix. See EQUED for the form of the equilibrated matrix. The type of A can be: Stype = NC; Dtype = SLU_D; Mtype = GE. R (input) double*, dimension (A->nrow) The row scale factors for A. C (input) double*, dimension (A->ncol) The column scale factors for A. ROWCND (input) double Ratio of the smallest R(i) to the largest R(i). COLCND (input) double Ratio of the smallest C(i) to the largest C(i). AMAX (input) double Absolute value of largest matrix entry. EQUED (output) char* Specifies the form of equilibration that was done. = 'N': No equilibration = 'R': Row equilibration, i.e., A has been premultiplied by diag(R). = 'C': Column equilibration, i.e., A has been postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A has been replaced by diag(R) * A * diag(C). Internal Parameters =================== THRESH is a threshold value used to decide if row or column scaling should be done based on the ratio of the row or column scaling factors. If ROWCND < THRESH, row scaling is done, and if COLCND < THRESH, column scaling is done. LARGE and SMALL are threshold values used to decide if row scaling should be done based on the absolute size of the largest matrix element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. =====================================================================
int_t dlasum_bmod_Tree | ( | int_t | pTree, |
int_t | cTree, | ||
double * | lsum, | ||
double * | x, | ||
dxT_struct * | xT_s, | ||
int | nrhs, | ||
dlsumBmod_buff_t * | lbmod_buf, | ||
dLUstruct_t * | LUstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
SuperLUStat_t * | stat | ||
) |
int dldperm_dist | ( | int | job, |
int | n, | ||
int_t | nnz, | ||
int_t | colptr[], | ||
int_t | adjncy[], | ||
double | nzval[], | ||
int_t * | perm, | ||
double | u[], | ||
double | v[] | ||
) |
Purpose ======= DLDPERM finds a row permutation so that the matrix has large entries on the diagonal. Arguments ========= job (input) int Control the action. Possible values for JOB are: = 1 : Compute a row permutation of the matrix so that the permuted matrix has as many entries on its diagonal as possible. The values on the diagonal are of arbitrary size. HSL subroutine MC21A/AD is used for this. = 2 : Compute a row permutation of the matrix so that the smallest value on the diagonal of the permuted matrix is maximized. = 3 : Compute a row permutation of the matrix so that the smallest value on the diagonal of the permuted matrix is maximized. The algorithm differs from the one used for JOB = 2 and may have quite a different performance. = 4 : Compute a row permutation of the matrix so that the sum of the diagonal entries of the permuted matrix is maximized. = 5 : Compute a row permutation of the matrix so that the product of the diagonal entries of the permuted matrix is maximized and vectors to scale the matrix so that the nonzero diagonal entries of the permuted matrix are one in absolute value and all the off-diagonal entries are less than or equal to one in absolute value. Restriction: 1 <= JOB <= 5. n (input) int The order of the matrix. nnz (input) int The number of nonzeros in the matrix. adjncy (input) int*, of size nnz The adjacency structure of the matrix, which contains the row indices of the nonzeros. colptr (input) int*, of size n+1 The pointers to the beginning of each column in ADJNCY. nzval (input) double*, of size nnz The nonzero values of the matrix. nzval[k] is the value of the entry corresponding to adjncy[k]. It is not used if job = 1. perm (output) int*, of size n The permutation vector. perm[i] = j means row i in the original matrix is in row j of the permuted matrix. u (output) double*, of size n If job = 5, the natural logarithms of the row scaling factors. v (output) double*, of size n If job = 5, the natural logarithms of the column scaling factors. The scaled matrix B has entries b_ij = a_ij * exp(u_i + v_j).
int_t dleafForestBackSolve3d | ( | superlu_dist_options_t * | options, |
int_t | treeId, | ||
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | x, | ||
double * | lsum, | ||
double * | recvbuf, | ||
MPI_Request * | send_req, | ||
int | nrhs, | ||
dlsumBmod_buff_t * | lbmod_buf, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
int_t dleafForestForwardSolve3d | ( | superlu_dist_options_t * | options, |
int_t | treeId, | ||
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | x, | ||
double * | lsum, | ||
double * | recvbuf, | ||
double * | rtemp, | ||
MPI_Request * | send_req, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
int dLluBufFreeArr | ( | int_t | numLA, |
dLUValSubBuf_t ** | LUvsbs | ||
) |
int_t dLluBufInit | ( | dLUValSubBuf_t * | , |
dLUstruct_t * | |||
) |
dLUValSubBuf_t ** dLluBufInitArr | ( | int_t | numLA, |
dLUstruct_t * | LUstruct | ||
) |
int_t dlocalSolveXkYk | ( | trtype_t | trtype, |
int_t | k, | ||
double * | x, | ||
int | nrhs, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
SuperLUStat_t * | stat | ||
) |
int_t dLPanelTrSolve | ( | int_t | k, |
int * | factored_L, | ||
double * | BlockUFactor, | ||
gridinfo_t * | , | ||
dLUstruct_t * | |||
) |
int_t dLPanelUpdate | ( | int_t | k, |
int * | IrecvPlcd_D, | ||
int * | factored_L, | ||
MPI_Request * | , | ||
double * | BlockUFactor, | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SCT_t * | |||
) |
int_t dLpanelUpdate | ( | int_t | off0, |
int_t | nsupc, | ||
double * | ublk_ptr, | ||
int_t | ld_ujrow, | ||
double * | lusup, | ||
int_t | nsupr, | ||
SCT_t * | |||
) |
void dlsum_bmod | ( | double * | lsum, |
double * | x, | ||
double * | xk, | ||
int | nrhs, | ||
int_t | k, | ||
int * | bmod, | ||
int_t * | Urbs, | ||
Ucb_indptr_t ** | Ucb_indptr, | ||
int_t ** | Ucb_valptr, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
MPI_Request | send_req[], | ||
SuperLUStat_t * | stat | ||
) |
void dlsum_bmod_GG | ( | double * | lsum, |
double * | x, | ||
double * | xk, | ||
int | nrhs, | ||
dlsumBmod_buff_t * | lbmod_buf, | ||
int_t | k, | ||
int * | bmod, | ||
int_t * | Urbs, | ||
Ucb_indptr_t ** | Ucb_indptr, | ||
int_t ** | Ucb_valptr, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
MPI_Request | send_req[], | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dlsum_bmod_GG_newsolve | ( | dtrf3Dpartition_t * | trf3Dpartition, |
double * | lsum, | ||
double * | x, | ||
double * | xk, | ||
int | nrhs, | ||
dlsumBmod_buff_t * | lbmod_buf, | ||
int_t | k, | ||
int * | bmod, | ||
int_t * | Urbs, | ||
Ucb_indptr_t ** | Ucb_indptr, | ||
int_t ** | Ucb_valptr, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
MPI_Request | send_req[], | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dlsum_bmod_inv | ( | double * | lsum, |
double * | x, | ||
double * | xk, | ||
double * | rtemp, | ||
int | nrhs, | ||
int_t | k, | ||
int * | bmod, | ||
int_t * | Urbs, | ||
Ucb_indptr_t ** | Ucb_indptr, | ||
int_t ** | Ucb_valptr, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
SuperLUStat_t ** | stat, | ||
int_t * | root_send, | ||
int_t * | nroot_send, | ||
int_t | sizelsum, | ||
int_t | sizertemp, | ||
int | thread_id, | ||
int | num_thread | ||
) |
void dlsum_bmod_inv_gpu_wrap | ( | superlu_dist_options_t * | , |
int | , | ||
int | , | ||
int | , | ||
int | , | ||
double * | , | ||
double * | , | ||
int | , | ||
int | , | ||
int_t | , | ||
int * | , | ||
C_Tree * | , | ||
C_Tree * | , | ||
int_t * | , | ||
int_t * | , | ||
int64_t * | , | ||
int_t * | , | ||
int64_t * | , | ||
int_t * | , | ||
int64_t * | , | ||
double * | , | ||
int64_t * | , | ||
double * | , | ||
int64_t * | , | ||
double * | , | ||
int64_t * | , | ||
int_t * | , | ||
int64_t * | , | ||
int_t * | , | ||
gridinfo_t * | , | ||
int_t | , | ||
uint64_t * | , | ||
uint64_t * | , | ||
double * | , | ||
double * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int | |||
) |
void dlsum_bmod_inv_master | ( | double * | lsum, |
double * | x, | ||
double * | xk, | ||
double * | rtemp, | ||
int | nrhs, | ||
int_t | k, | ||
int * | bmod, | ||
int_t * | Urbs, | ||
Ucb_indptr_t ** | Ucb_indptr, | ||
int_t ** | Ucb_valptr, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
SuperLUStat_t ** | stat, | ||
int_t | sizelsum, | ||
int_t | sizertemp, | ||
int | thread_id, | ||
int | num_thread | ||
) |
void dlsum_fmod | ( | double * | lsum, |
double * | x, | ||
double * | xk, | ||
double * | rtemp, | ||
int | nrhs, | ||
int | knsupc, | ||
int_t | k, | ||
int * | fmod, | ||
int_t | nlb, | ||
int_t | lptr, | ||
int_t | luptr, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
MPI_Request | send_req[], | ||
SuperLUStat_t * | stat | ||
) |
Purpose ======= Perform local block modifications: lsum[i] -= L_i,k * X[k].
void dlsum_fmod_inv | ( | double * | lsum, |
double * | x, | ||
double * | xk, | ||
double * | rtemp, | ||
int | nrhs, | ||
int_t | k, | ||
int * | fmod, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
SuperLUStat_t ** | stat, | ||
int_t * | leaf_send, | ||
int_t * | nleaf_send, | ||
int_t | sizelsum, | ||
int_t | sizertemp, | ||
int_t | recurlevel, | ||
int_t | maxsuper, | ||
int | thread_id, | ||
int | num_thread | ||
) |
Purpose ======= Perform local block modifications: lsum[i] -= L_i,k * X[k].
void dlsum_fmod_inv_gpu_wrap | ( | int | , |
int | , | ||
int | , | ||
int | , | ||
double * | , | ||
double * | , | ||
int | , | ||
int | , | ||
int_t | , | ||
int * | fmod, | ||
C_Tree * | , | ||
C_Tree * | , | ||
int_t * | , | ||
int_t * | , | ||
int64_t * | , | ||
double * | , | ||
int64_t * | , | ||
double * | , | ||
int64_t * | , | ||
int_t * | , | ||
int64_t * | , | ||
int_t * | , | ||
int * | , | ||
gridinfo_t * | , | ||
int_t | , | ||
uint64_t * | , | ||
uint64_t * | , | ||
double * | , | ||
double * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int * | , | ||
int | |||
) |
void dlsum_fmod_inv_master | ( | double * | lsum, |
double * | x, | ||
double * | xk, | ||
double * | rtemp, | ||
int | nrhs, | ||
int | knsupc, | ||
int_t | k, | ||
int * | fmod, | ||
int_t | nlb, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
SuperLUStat_t ** | stat, | ||
int_t | sizelsum, | ||
int_t | sizertemp, | ||
int_t | recurlevel, | ||
int_t | maxsuper, | ||
int | thread_id, | ||
int | num_thread | ||
) |
Purpose ======= Perform local block modifications: lsum[i] -= L_i,k * X[k].
void dlsum_fmod_leaf | ( | int_t | treeId, |
dtrf3Dpartition_t * | trf3Dpartition, | ||
double * | lsum, | ||
double * | x, | ||
double * | xk, | ||
double * | rtemp, | ||
int | nrhs, | ||
int | knsupc, | ||
int_t | k, | ||
int * | fmod, | ||
int_t | nlb, | ||
int_t | lptr, | ||
int_t | luptr, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
MPI_Request | send_req[], | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dlsum_fmod_leaf_newsolve | ( | dtrf3Dpartition_t * | trf3Dpartition, |
double * | lsum, | ||
double * | x, | ||
double * | xk, | ||
double * | rtemp, | ||
int | nrhs, | ||
int | knsupc, | ||
int_t | k, | ||
int * | fmod, | ||
int_t | nlb, | ||
int_t | lptr, | ||
int_t | luptr, | ||
int_t * | xsup, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
MPI_Request | send_req[], | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
int_t dlsumForestBsolve | ( | int_t | k, |
int_t | treeId, | ||
double * | lsum, | ||
double * | x, | ||
dxT_struct * | xT_s, | ||
int | nrhs, | ||
dlsumBmod_buff_t * | lbmod_buf, | ||
dLUstruct_t * | LUstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
SuperLUStat_t * | stat | ||
) |
int_t dlsumReducePrK | ( | int_t | k, |
double * | x, | ||
double * | lsum, | ||
double * | recvbuf, | ||
int | nrhs, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dLUstructFree | ( | dLUstruct_t * | LUstruct | ) |
Deallocate LUstruct.
void dLUstructInit | ( | const | int_t, |
dLUstruct_t * | LUstruct | ||
) |
Allocate storage in LUstruct.
double dMaxAbsLij | ( | int | iam, |
int | n, | ||
Glu_persist_t * | Glu_persist, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid | ||
) |
Find max(abs(L(i,j)))
double dMaxAbsUij | ( | int | iam, |
int | n, | ||
Glu_persist_t * | Glu_persist, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid | ||
) |
Find max(abs(U(i,j)))
void dnewTrfPartitionInit | ( | int_t | nsupers, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
int_t dnonLeafForestBackSolve3d | ( | int_t | treeId, |
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | x, | ||
double * | lsum, | ||
dxT_struct * | xT_s, | ||
double * | recvbuf, | ||
MPI_Request * | send_req, | ||
int | nrhs, | ||
dlsumBmod_buff_t * | lbmod_buf, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
Pkk(Yk) = sumOver_PrK (Yk)
int_t dnonLeafForestForwardSolve3d | ( | int_t | treeId, |
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | x, | ||
double * | lsum, | ||
dxT_struct * | xT_s, | ||
double * | recvbuf, | ||
double * | rtemp, | ||
MPI_Request * | send_req, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
Pkk(Yk) = sumOver_PrK (Yk)
double * doubleCalloc_dist | ( | int_t | n | ) |
double * doubleMalloc_dist | ( | int_t | n | ) |
int_t dp2pSolvedX3d | ( | int_t | treeId, |
int_t | sender, | ||
int_t | receiver, | ||
double * | x, | ||
int | nrhs, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
int_t dp3dCollect | ( | int_t | layer, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
int_t dp3dScatter | ( | int_t | n, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
int * | supernodeMask | ||
) |
int_t dPackLBlock | ( | int_t | k, |
double * | Dest, | ||
Glu_persist_t * | , | ||
gridinfo_t * | , | ||
dLocalLU_t * | |||
) |
void dperform_row_permutation | ( | superlu_dist_options_t * | options, |
fact_t | Fact, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dLUstruct_t * | LUstruct, | ||
int_t | m, | ||
int_t | n, | ||
gridinfo_t * | grid, | ||
SuperMatrix * | A, | ||
SuperMatrix * | GA, | ||
SuperLUStat_t * | stat, | ||
int | job, | ||
int | Equil, | ||
int * | rowequ, | ||
int * | colequ, | ||
int * | iinfo | ||
) |
int dpivot_batch | ( | superlu_dist_options_t * | options, |
int | batchCount, | ||
int | m, | ||
int | n, | ||
handle_t * | SparseMatrix_handles, | ||
double ** | ReqPtr, | ||
double ** | CeqPtr, | ||
DiagScale_t * | DiagScale, | ||
int ** | RpivPtr | ||
) |
Compute row pivotings for each matrix, for numerical stability.
[in] | options | solver options |
[in] | batchCount | number of matrices in the batch |
[in] | m | row dimension of the matrices |
[in] | n | column dimension of the matrices |
[in,out] | SparseMatrix_handles | pointers to the matrices in the batch, each pointing to the actual stoage in CSC format On entry, the original matrices, may be overwritten by A1 <- diag(R)*A*diag(C) from dequil_batch() On exit, each matrix may be A2 <- Pr*A1 |
[in,out] | ReqPtr | pointers to row scaling vectors, maybe overwritten by scaling from MC64 |
[in,out] | CeqPtr | pointers to column scaling vectors, maybe overwritten by scaling from MC64 |
[in,out] | DiagScale | array indicating how each system is equilibrated: {ROW, COL, BOTH} |
[in,out] | RpivPtr | pointers to row permutation vectors for each matrix, each of size m On exit, each RpivPtr[] is applied to each matrix Return value: 0, success -1, invalid RowPerm option; an Identity perm_r[] is returned d, indicates that the d-th matrix is the first one in the batch encountering error |
void dprepare_multiGPU_buffers | ( | int | , |
int | , | ||
int | , | ||
int | , | ||
int | , | ||
int | |||
) |
void dPrint_CompCol_Matrix_dist | ( | SuperMatrix * | ) |
int dPrint_CompRowLoc_Matrix_dist | ( | SuperMatrix * | ) |
void dPrint_Dense_Matrix_dist | ( | SuperMatrix * | ) |
void dPrintLblocks | ( | int | iam, |
int_t | nsupers, | ||
gridinfo_t * | grid, | ||
Glu_persist_t * | Glu_persist, | ||
dLocalLU_t * | Llu | ||
) |
Print the blocks in the factored matrix L.
void dPrintUblocks | ( | int | iam, |
int_t | nsupers, | ||
gridinfo_t * | grid, | ||
Glu_persist_t * | Glu_persist, | ||
dLocalLU_t * | Llu | ||
) |
Print the blocks in the factored matrix U.
int_t dQuerySpace_dist | ( | int_t | n, |
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
SuperLUStat_t * | stat, | ||
superlu_dist_mem_usage_t * | mem_usage | ||
) |
mem_usage consists of the following fields:
int dread_binary | ( | FILE * | fp, |
int_t * | m, | ||
int_t * | n, | ||
int_t * | nnz, | ||
double ** | nzval, | ||
int_t ** | rowind, | ||
int_t ** | colptr | ||
) |
void dreadhb_dist | ( | int | iam, |
FILE * | fp, | ||
int_t * | nrow, | ||
int_t * | ncol, | ||
int_t * | nonz, | ||
double ** | nzval, | ||
int_t ** | rowind, | ||
int_t ** | colptr | ||
) |
Purpose ======= Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format as described below. Line 1 (A72,A8) Col. 1 - 72 Title (TITLE) Col. 73 - 80 Key (KEY) Line 2 (5I14) Col. 1 - 14 Total number of lines excluding header (TOTCRD) Col. 15 - 28 Number of lines for pointers (PTRCRD) Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) Col. 43 - 56 Number of lines for numerical values (VALCRD) Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) (including starting guesses and solution vectors if present) (zero indicates no right-hand side data is present) Line 3 (A3, 11X, 4I14) Col. 1 - 3 Matrix type (see below) (MXTYPE) Col. 15 - 28 Number of rows (or variables) (NROW) Col. 29 - 42 Number of columns (or elements) (NCOL) Col. 43 - 56 Number of row (or variable) indices (NNZERO) (equal to number of entries for assembled matrices) Col. 57 - 70 Number of elemental matrix entries (NELTVL) (zero in the case of assembled matrices) Line 4 (2A16, 2A20) Col. 1 - 16 Format for pointers (PTRFMT) Col. 17 - 32 Format for row (or variable) indices (INDFMT) Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present Col. 1 Right-hand side type: F for full storage or M for same format as matrix Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) Col. 3 X if an exact solution vector(s) is supplied. Col. 15 - 28 Number of right-hand sides (NRHS) Col. 29 - 42 Number of row indices (NRHSIX) (ignored in case of unassembled matrices) The three character type field on line 3 describes the matrix type. The following table lists the permitted values for each of the three characters. As an example of the type field, RSA denotes that the matrix is real, symmetric, and assembled. First Character: R Real matrix C Complex matrix P Pattern only (no numerical values supplied) Second Character: S Symmetric U Unsymmetric H Hermitian Z Skew symmetric R Rectangular Third Character: A Assembled E Elemental matrices (unassembled)
void dreadMM_dist | ( | FILE * | fp, |
int_t * | m, | ||
int_t * | n, | ||
int_t * | nonz, | ||
double ** | nzval, | ||
int_t ** | rowind, | ||
int_t ** | colptr | ||
) |
brief
Output parameters ================= (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of nonzeros in columns of matrix A; (*nzval)[*] the numerical values; column i of A is given by (*nzval)[k], k = (*rowind)[i],..., (*rowind)[i+1]-1.
void dreadrb_dist | ( | int | iam, |
FILE * | fp, | ||
int_t * | nrow, | ||
int_t * | ncol, | ||
int_t * | nonz, | ||
double ** | nzval, | ||
int_t ** | rowind, | ||
int_t ** | colptr | ||
) |
void dreadtriple_dist | ( | FILE * | fp, |
int_t * | m, | ||
int_t * | n, | ||
int_t * | nonz, | ||
double ** | nzval, | ||
int_t ** | rowind, | ||
int_t ** | colptr | ||
) |
brief
Output parameters ================= (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of nonzeros in columns of matrix A; (*nzval)[*] the numerical values; column i of A is given by (*nzval)[k], k = (*rowind)[i],..., (*rowind)[i+1]-1.
void dreadtriple_noheader | ( | FILE * | fp, |
int_t * | m, | ||
int_t * | n, | ||
int_t * | nonz, | ||
double ** | nzval, | ||
int_t ** | rowind, | ||
int_t ** | colptr | ||
) |
brief
Output parameters ================= (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of nonzeros in columns of matrix A; (*nzval)[*] the numerical values; column i of A is given by (*nzval)[k], k = (*rowind)[i],..., (*rowind)[i+1]-1.
int_t dRecv_UDiagBlock | ( | int_t | k0, |
double * | ublk_ptr, | ||
int_t | size, | ||
int_t | src, | ||
gridinfo_t * | , | ||
SCT_t * | , | ||
int | |||
) |
int_t dReDistribute_A | ( | SuperMatrix * | A, |
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_freeable_t * | Glu_freeable, | ||
int_t * | xsup, | ||
int_t * | supno, | ||
gridinfo_t * | grid, | ||
int_t * | colptr[], | ||
int_t * | rowind[], | ||
double * | a[] | ||
) |
Purpose ======= Re-distribute A on the 2D process mesh. Arguments ========= A (input) SuperMatrix* The distributed input matrix A of dimension (A->nrow, A->ncol). A may be overwritten by diag(R)*A*diag(C)*Pc^T. The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. ScalePermstruct (input) dScalePermstruct_t* The data structure to store the scaling and permutation vectors describing the transformations performed to the original matrix A. Glu_freeable (input) *Glu_freeable_t The global structure describing the graph of L and U. grid (input) gridinfo_t* The 2D process mesh. colptr (output) int* rowind (output) int* a (output) double* Return value ============ > 0, working storage (in bytes) required to perform redistribution. (excluding LU factor size)
int dreduceAllAncestors3d | ( | int_t | ilvl, |
int_t * | myNodeCount, | ||
int_t ** | treePerm, | ||
dLUValSubBuf_t * | LUvsb, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int_t dreduceAncestors3d | ( | int_t | sender, |
int_t | receiver, | ||
int_t | nnodes, | ||
int_t * | nodeList, | ||
double * | Lval_buf, | ||
double * | Uval_buf, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int_t dreduceSolvedX_newsolve | ( | int_t | treeId, |
int_t | sender, | ||
int_t | receiver, | ||
double * | x, | ||
int | nrhs, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
double * | recvbuf, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void dRgather_L | ( | int_t | k, |
int_t * | lsub, | ||
double * | lusup, | ||
gEtreeInfo_t * | , | ||
Glu_persist_t * | , | ||
gridinfo_t * | , | ||
HyP_t * | , | ||
int_t * | myIperm, | ||
int_t * | iperm_c_supno | ||
) |
void dRgather_U | ( | int_t | k, |
int_t | jj0, | ||
int_t * | usub, | ||
double * | uval, | ||
double * | bigU, | ||
gEtreeInfo_t * | , | ||
Glu_persist_t * | , | ||
gridinfo_t * | , | ||
HyP_t * | , | ||
int_t * | myIperm, | ||
int_t * | iperm_c_supno, | ||
int_t * | perm_u | ||
) |
int dscal_ | ( | const int * | n, |
const double * | alpha, | ||
double * | dx, | ||
const int * | incx | ||
) |
void dScaleAdd_CompRowLoc_Matrix_dist | ( | SuperMatrix * | A, |
SuperMatrix * | B, | ||
double | c | ||
) |
Scale and add: adds a scalar multiple of one matrix to another. A_{i,j} = c * A_{i,j} + B_{i,j}$ for i,j=1,...,n.
void dScaleAddId_CompRowLoc_Matrix_dist | ( | SuperMatrix * | A, |
double | c | ||
) |
Scale and add I: scales a matrix and adds an identity. A_{i,j} = c * A_{i,j} + \delta_{i,j} for i,j=1,...,n and \delta_{i,j} is the Kronecker delta.
void dscaleMatrixDiagonally | ( | fact_t | Fact, |
dScalePermstruct_t * | ScalePermstruct, | ||
SuperMatrix * | A, | ||
SuperLUStat_t * | stat, | ||
gridinfo_t * | grid, | ||
int * | rowequ, | ||
int * | colequ, | ||
int * | iinfo | ||
) |
void dScalePermstructFree | ( | dScalePermstruct_t * | ScalePermstruct | ) |
Deallocate ScalePermstruct.
void dScalePermstructInit | ( | const | int_t, |
const | int_t, | ||
dScalePermstruct_t * | ScalePermstruct | ||
) |
Allocate storage in ScalePermstruct.
int_t dscatter3dLPanels | ( | int_t | nsupers, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
int * | supernodeMask | ||
) |
int_t dscatter3dUPanels | ( | int_t | nsupers, |
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
int * | supernodeMask | ||
) |
int dScatter_B3d | ( | NRformat_loc3d * | A3d, |
gridinfo3d_t * | grid3d | ||
) |
void dscatter_l | ( | int | ib, |
int | ljb, | ||
int | nsupc, | ||
int_t | iukp, | ||
int_t * | xsup, | ||
int | klst, | ||
int | nbrow, | ||
int_t | lptr, | ||
int | temp_nbrow, | ||
int_t * | usub, | ||
int_t * | lsub, | ||
double * | tempv, | ||
int * | indirect_thread, | ||
int * | indirect2, | ||
int_t ** | Lrowind_bc_ptr, | ||
double ** | Lnzval_bc_ptr, | ||
gridinfo_t * | grid | ||
) |
void dscatter_u | ( | int | ib, |
int | jb, | ||
int | nsupc, | ||
int_t | iukp, | ||
int_t * | xsup, | ||
int | klst, | ||
int | nbrow, | ||
int_t | lptr, | ||
int | temp_nbrow, | ||
int_t * | lsub, | ||
int_t * | usub, | ||
double * | tempv, | ||
int_t ** | Ufstnz_br_ptr, | ||
double ** | Unzval_br_ptr, | ||
gridinfo_t * | grid | ||
) |
int_t dSchurComplementSetup | ( | int_t | k, |
int * | msgcnt, | ||
Ublock_info_t * | , | ||
Remain_info_t * | , | ||
uPanelInfo_t * | , | ||
lPanelInfo_t * | , | ||
int_t * | , | ||
int_t * | , | ||
int_t * | , | ||
double * | bigU, | ||
int_t * | Lsub_buf, | ||
double * | Lval_buf, | ||
int_t * | Usub_buf, | ||
double * | Uval_buf, | ||
gridinfo_t * | , | ||
dLUstruct_t * | |||
) |
int_t dSchurComplementSetupGPU | ( | int_t | k, |
msgs_t * | msgs, | ||
packLUInfo_t * | , | ||
int_t * | , | ||
int_t * | , | ||
int_t * | , | ||
gEtreeInfo_t * | , | ||
factNodelists_t * | , | ||
dscuBufs_t * | , | ||
dLUValSubBuf_t * | LUvsb, | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
HyP_t * | |||
) |
void dSolveFinalize | ( | superlu_dist_options_t * | options, |
dSOLVEstruct_t * | SOLVEstruct | ||
) |
Release the resources used for the solution phase.
int dSolveInit | ( | superlu_dist_options_t * | options, |
SuperMatrix * | A, | ||
int_t | perm_r[], | ||
int_t | perm_c[], | ||
int_t | nrhs, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
dSOLVEstruct_t * | SOLVEstruct | ||
) |
Initialize the data structure for the solution phase.
int_t dsparseTreeFactor | ( | int_t | nnodes, |
int_t * | perm_c_supno, | ||
treeTopoInfo_t * | treeTopoInfo, | ||
commRequests_t * | comReqs, | ||
dscuBufs_t * | scuBufs, | ||
packLUInfo_t * | packLUInfo, | ||
msgs_t * | msgs, | ||
dLUValSubBuf_t * | LUvsb, | ||
ddiagFactBufs_t * | dFBuf, | ||
factStat_t * | factStat, | ||
factNodelists_t * | fNlists, | ||
superlu_dist_options_t * | options, | ||
int_t * | gIperm_c_supno, | ||
int_t | ldt, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SuperLUStat_t * | stat, | ||
double | thresh, | ||
SCT_t * | SCT, | ||
int * | info | ||
) |
int_t dsparseTreeFactor_ASYNC | ( | sForest_t * | sforest, |
commRequests_t ** | comReqss, | ||
dscuBufs_t * | scuBufs, | ||
packLUInfo_t * | packLUInfo, | ||
msgs_t ** | msgss, | ||
dLUValSubBuf_t ** | LUvsbs, | ||
ddiagFactBufs_t ** | dFBufs, | ||
factStat_t * | factStat, | ||
factNodelists_t * | fNlists, | ||
gEtreeInfo_t * | gEtreeInfo, | ||
superlu_dist_options_t * | options, | ||
int_t * | gIperm_c_supno, | ||
int_t | ldt, | ||
HyP_t * | HyP, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SuperLUStat_t * | stat, | ||
double | thresh, | ||
SCT_t * | SCT, | ||
int | tag_ub, | ||
int * | info | ||
) |
int dstatic_schedule | ( | superlu_dist_options_t * | options, |
int | m, | ||
int | n, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
SuperLUStat_t * | stat, | ||
int_t * | perm_c_supno, | ||
int_t * | iperm_c_supno, | ||
int * | info | ||
) |
int_t dTrs2_GatherTrsmScatter | ( | int_t | klst, |
int_t | iukp, | ||
int_t | rukp, | ||
int_t * | usub, | ||
double * | uval, | ||
double * | tempv, | ||
int_t | knsupc, | ||
int | nsupr, | ||
double * | lusup, | ||
Glu_persist_t * | Glu_persist | ||
) |
int_t dTrs2_GatherU | ( | int_t | iukp, |
int_t | rukp, | ||
int_t | klst, | ||
int_t | nsupc, | ||
int_t | ldu, | ||
int_t * | usub, | ||
double * | uval, | ||
double * | tempv | ||
) |
int_t dTrs2_ScatterU | ( | int_t | iukp, |
int_t | rukp, | ||
int_t | klst, | ||
int_t | nsupc, | ||
int_t | ldu, | ||
int_t * | usub, | ||
double * | uval, | ||
double * | tempv | ||
) |
int_t dtrs_B_init3d | ( | int_t | nsupers, |
double * | x, | ||
int | nrhs, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
int dtrs_compute_communication_structure | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
int * | supernodeMask, | ||
gridinfo_t * | grid, | ||
SuperLUStat_t * | stat | ||
) |
int_t dtrs_X_gather3d | ( | double * | x, |
int | nrhs, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
int dtrsm_ | ( | const char * | , |
const char * | , | ||
const char * | , | ||
const char * | , | ||
const int * | , | ||
const int * | , | ||
const double * | , | ||
const double * | , | ||
const int * | , | ||
double * | , | ||
const int * | |||
) |
int dtrsv_ | ( | char * | , |
char * | , | ||
char * | , | ||
int * | , | ||
double * | , | ||
int * | , | ||
double * | , | ||
int * | |||
) |
void dtrtri_ | ( | char * | , |
char * | , | ||
int * | , | ||
double * | , | ||
int * | , | ||
int * | |||
) |
int_t dUDiagBlockRecvWait | ( | int_t | k, |
int * | IrecvPlcd_D, | ||
int * | factored_L, | ||
MPI_Request * | , | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SCT_t * | |||
) |
int_t dUPanelTrSolve | ( | int_t | k, |
double * | BlockLFactor, | ||
double * | bigV, | ||
int_t | ldt, | ||
Ublock_info_t * | , | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SuperLUStat_t * | , | ||
SCT_t * | |||
) |
int_t dUPanelUpdate | ( | int_t | k, |
int * | factored_U, | ||
MPI_Request * | , | ||
double * | BlockLFactor, | ||
double * | bigV, | ||
int_t | ldt, | ||
Ublock_info_t * | , | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SuperLUStat_t * | , | ||
SCT_t * | |||
) |
int_t dWait_LRecv | ( | MPI_Request * | , |
int * | msgcnt, | ||
int * | msgcntsU, | ||
gridinfo_t * | , | ||
SCT_t * | |||
) |
int_t dWaitL | ( | int_t | k, |
int * | msgcnt, | ||
int * | msgcntU, | ||
MPI_Request * | , | ||
MPI_Request * | , | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SCT_t * | |||
) |
int_t dWaitU | ( | int_t | k, |
int * | msgcnt, | ||
MPI_Request * | , | ||
MPI_Request * | , | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SCT_t * | |||
) |
void dZero_CompRowLoc_Matrix_dist | ( | SuperMatrix * | A | ) |
Sets all entries of a matrix to zero, A_{i,j}=0, for i,j=1,..,n.
void dZeroLblocks | ( | int | iam, |
int | n, | ||
gridinfo_t * | grid, | ||
dLUstruct_t * | LUstruct | ||
) |
Sets all entries of matrix L to zero.
int_t dzeroSetLU | ( | int_t | nnodes, |
int_t * | nodeList, | ||
dLUstruct_t * | , | ||
gridinfo3d_t * | |||
) |
void dZeroUblocks | ( | int | iam, |
int | n, | ||
gridinfo_t * | grid, | ||
dLUstruct_t * | LUstruct | ||
) |
Sets all entries of matrix U to zero.
int_t dzRecvLPanel | ( | int_t | k, |
int_t | sender, | ||
double | alpha, | ||
double | beta, | ||
double * | Lval_buf, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int_t dzRecvUPanel | ( | int_t | k, |
int_t | sender, | ||
double | alpha, | ||
double | beta, | ||
double * | Uval_buf, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int_t dzSendLPanel | ( | int_t | k, |
int_t | receiver, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int_t dzSendUPanel | ( | int_t | k, |
int_t | receiver, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SCT_t * | SCT | ||
) |
int file_dPrint_CompRowLoc_Matrix_dist | ( | FILE * | fp, |
SuperMatrix * | A | ||
) |
int file_Printdouble5 | ( | FILE * | fp, |
char * | name, | ||
int_t | len, | ||
double * | x | ||
) |
void Free_HyP | ( | HyP_t * | HyP | ) |
int freePackLUInfo | ( | packLUInfo_t * | packLUInfo | ) |
int getNsupers | ( | int | n, |
Glu_persist_t * | Glu_persist | ||
) |
int_t initPackLUInfo | ( | int_t | nsupers, |
packLUInfo_t * | packLUInfo | ||
) |
void Local_Dgstrf2 | ( | superlu_dist_options_t * | options, |
int_t | k, | ||
double | thresh, | ||
double * | BlockUFactor, | ||
Glu_persist_t * | Glu_persist, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
SuperLUStat_t * | stat, | ||
int * | info, | ||
SCT_t * | SCT | ||
) |
void nv_init_wrapper | ( | MPI_Comm | ) |
int pdCompRow_loc_to_CompCol_global | ( | int_t | need_value, |
SuperMatrix * | A, | ||
gridinfo_t * | grid, | ||
SuperMatrix * | GA | ||
) |
Gather A from the distributed compressed row format to global A in compressed column format.
void pdCompute_Diag_Inv | ( | int_t | n, |
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose ======= Compute the inverse of the diagonal blocks of the L and U triangular matrices.
void pdconvert_flatten_skyline2UROWDATA | ( | superlu_dist_options_t * | options, |
gridinfo_t * | grid, | ||
dLUstruct_t * | LUstruct, | ||
SuperLUStat_t * | stat, | ||
int | n | ||
) |
void pdconvertU | ( | superlu_dist_options_t * | , |
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
SuperLUStat_t * | , | ||
int | |||
) |
void pdconvertUROWDATA2skyline | ( | superlu_dist_options_t * | options, |
gridinfo_t * | grid, | ||
dLUstruct_t * | LUstruct, | ||
SuperLUStat_t * | stat, | ||
int | n | ||
) |
float pddistribute | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
SuperMatrix * | A, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_freeable_t * | Glu_freeable, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid | ||
) |
float pddistribute3d_Yang | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
SuperMatrix * | A, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_freeable_t * | Glu_freeable, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d | ||
) |
float pddistribute_allgrid | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
SuperMatrix * | A, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_freeable_t * | Glu_freeable, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
int * | supernodeMask | ||
) |
float pddistribute_allgrid_index_only | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
SuperMatrix * | A, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_freeable_t * | Glu_freeable, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
int * | supernodeMask | ||
) |
int pdflatten_LDATA | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
SuperLUStat_t * | stat | ||
) |
void pdGetDiagU | ( | int_t | n, |
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
double * | diagU | ||
) |
Purpose ======= GetDiagU extracts the main diagonal of matrix U of the LU factorization. Arguments ========= n (input) int Dimension of the matrix. LUstruct (input) dLUstruct_t* The data structures to store the distributed L and U factors. see superlu_ddefs.h for its definition. grid (input) gridinfo_t* The 2D process mesh. It contains the MPI communicator, the number of process rows (NPROW), the number of process columns (NPCOL), and my process rank. It is an input argument to all the parallel routines. diagU (output) double*, dimension (n) The main diagonal of matrix U. On exit, it is available on all processes. Note ==== The diagonal blocks of the L and U matrices are stored in the L data structures, and are on the diagonal processes of the 2D process grid. This routine is modified from gather_diag_to_all() in pdgstrs_Bglobal.c.
void pdgsequ | ( | SuperMatrix * | A, |
double * | r, | ||
double * | c, | ||
double * | rowcnd, | ||
double * | colcnd, | ||
double * | amax, | ||
int * | info, | ||
gridinfo_t * | grid | ||
) |
Purpose ======= PDGSEQU computes row and column scalings intended to equilibrate an M-by-N sparse matrix A and reduce its condition number. R returns the row scale factors and C the column scale factors, chosen to try to make the largest element in each row and column of the matrix B with elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. R(i) and C(j) are restricted to be between SMLNUM = smallest safe number and BIGNUM = largest safe number. Use of these scaling factors is not guaranteed to reduce the condition number of A but works well in practice. See supermatrix.h for the definition of 'SuperMatrix' structure. Arguments ========= A (input) SuperMatrix* The matrix of dimension (A->nrow, A->ncol) whose equilibration factors are to be computed. The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. R (output) double*, size A->nrow If INFO = 0 or INFO > M, R contains the row scale factors for A. C (output) double*, size A->ncol If INFO = 0, C contains the column scale factors for A. ROWCND (output) double* If INFO = 0 or INFO > M, ROWCND contains the ratio of the smallest R(i) to the largest R(i). If ROWCND >= 0.1 and AMAX is neither too large nor too small, it is not worth scaling by R. COLCND (output) double* If INFO = 0, COLCND contains the ratio of the smallest C(i) to the largest C(i). If COLCND >= 0.1, it is not worth scaling by C. AMAX (output) double* Absolute value of largest matrix element. If AMAX is very close to overflow or very close to underflow, the matrix should be scaled. INFO (output) int* = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= M: the i-th row of A is exactly zero > M: the (i-M)-th column of A is exactly zero GRID (input) gridinof_t* The 2D process mesh. =====================================================================
void pdgsmv | ( | int_t | abs, |
SuperMatrix * | A_internal, | ||
gridinfo_t * | grid, | ||
pdgsmv_comm_t * | gsmv_comm, | ||
double | x[], | ||
double | ax[] | ||
) |
int pdgsmv_AXglobal | ( | int_t | m, |
int_t | update[], | ||
double | val[], | ||
int_t | bindx[], | ||
double | X[], | ||
double | ax[] | ||
) |
Performs sparse matrix-vector multiplication.
int pdgsmv_AXglobal_abs | ( | int_t | m, |
int_t | update[], | ||
double | val[], | ||
int_t | bindx[], | ||
double | X[], | ||
double | ax[] | ||
) |
int pdgsmv_AXglobal_setup | ( | SuperMatrix * | , |
Glu_persist_t * | , | ||
gridinfo_t * | , | ||
int_t * | , | ||
int_t * | [], | ||
double * | [], | ||
int_t * | [], | ||
int_t | [] | ||
) |
void pdgsmv_finalize | ( | pdgsmv_comm_t * | gsmv_comm | ) |
void pdgsmv_init | ( | SuperMatrix * | A, |
int_t * | row_to_proc, | ||
gridinfo_t * | grid, | ||
pdgsmv_comm_t * | gsmv_comm | ||
) |
void pdgsrfs | ( | superlu_dist_options_t * | , |
int_t | , | ||
SuperMatrix * | , | ||
double | , | ||
dLUstruct_t * | , | ||
dScalePermstruct_t * | , | ||
gridinfo_t * | , | ||
double | [], | ||
int_t | , | ||
double | [], | ||
int_t | , | ||
int | , | ||
dSOLVEstruct_t * | , | ||
double * | , | ||
SuperLUStat_t * | , | ||
int * | |||
) |
void pdgsrfs3d | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
SuperMatrix * | A, | ||
double | anorm, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
gridinfo3d_t * | grid3d, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
double * | B, | ||
int_t | ldb, | ||
double * | X, | ||
int_t | ldx, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
double * | berr, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose ======= PDGSRFS3D improves the computed solution to a system of linear equations and provides error bounds and backward error estimates for the solution. Arguments ========= options (input) superlu_dist_options_t* (global) The structure defines the input parameters to control how the LU decomposition and triangular solve are performed. n (input) int (global) The order of the system of linear equations. A (input) SuperMatrix* The original matrix A, or the scaled A if equilibration was done. A is also permuted into diag(R)*A*diag(C)*Pc'. The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. anorm (input) double The norm of the original matrix A, or the scaled A if equilibration was done. LUstruct (input) dLUstruct_t* The distributed data structures storing L and U factors. The L and U factors are obtained from pdgstrf for the possibly scaled and permuted matrix A. See superlu_ddefs.h for the definition of 'dLUstruct_t'. ScalePermstruct (input) dScalePermstruct_t* (global) The data structure to store the scaling and permutation vectors describing the transformations performed to the matrix A. grid (input) gridinfo_t* The 2D process mesh. It contains the MPI communicator, the number of process rows (NPROW), the number of process columns (NPCOL), and my process rank. It is an input argument to all the parallel routines. Grid can be initialized by subroutine SUPERLU_GRIDINIT. See superlu_defs.h for the definition of 'gridinfo_t'. B (input) double* (local) The m_loc-by-NRHS right-hand side matrix of the possibly equilibrated system. That is, B may be overwritten by diag(R)*B. ldb (input) int (local) Leading dimension of matrix B. X (input/output) double* (local) On entry, the solution matrix Y, as computed by PDGSTRS, of the transformed system A1*Y = Pc*Pr*B. where A1 = Pc*Pr*diag(R)*A*diag(C)*Pc' and Y = Pc*diag(C)^(-1)*X. On exit, the improved solution matrix Y. In order to obtain the solution X to the original system, Y should be permutated by Pc^T, and premultiplied by diag(C) if DiagScale = COL or BOTH. This must be done after this routine is called. ldx (input) int (local) Leading dimension of matrix X. nrhs (input) int Number of right-hand sides. SOLVEstruct (output) dSOLVEstruct_t* (global) Contains the information for the communication during the solution phase. berr (output) double*, dimension (nrhs) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). stat (output) SuperLUStat_t* Record the statistics about the refinement steps. See util.h for the definition of SuperLUStat_t. info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement.
void pdgsrfs_ABXglobal | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
SuperMatrix * | A, | ||
double | anorm, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
double * | B, | ||
int_t | ldb, | ||
double * | X, | ||
int_t | ldx, | ||
int | nrhs, | ||
double * | berr, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose ======= pdgsrfs_ABXglobal improves the computed solution to a system of linear equations and provides error bounds and backward error estimates for the solution. Arguments ========= n (input) int (global) The order of the system of linear equations. A (input) SuperMatrix* The original matrix A, or the scaled A if equilibration was done. A is also permuted into the form Pc*Pr*A*Pc', where Pr and Pc are permutation matrices. The type of A can be: Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. NOTE: Currently, A must reside in all processes when calling this routine. anorm (input) double The norm of the original matrix A, or the scaled A if equilibration was done. LUstruct (input) dLUstruct_t* The distributed data structures storing L and U factors. The L and U factors are obtained from pdgstrf for the possibly scaled and permuted matrix A. See superlu_ddefs.h for the definition of 'dLUstruct_t'. grid (input) gridinfo_t* The 2D process mesh. It contains the MPI communicator, the number of process rows (NPROW), the number of process columns (NPCOL), and my process rank. It is an input argument to all the parallel routines. Grid can be initialized by subroutine SUPERLU_GRIDINIT. See superlu_ddefs.h for the definition of 'gridinfo_t'. B (input) double* (global) The N-by-NRHS right-hand side matrix of the possibly equilibrated and row permuted system. NOTE: Currently, B must reside on all processes when calling this routine. ldb (input) int (global) Leading dimension of matrix B. X (input/output) double* (global) On entry, the solution matrix X, as computed by PDGSTRS. On exit, the improved solution matrix X. If DiagScale = COL or BOTH, X should be premultiplied by diag(C) in order to obtain the solution to the original system. NOTE: Currently, X must reside on all processes when calling this routine. ldx (input) int (global) Leading dimension of matrix X. nrhs (input) int Number of right-hand sides. berr (output) double*, dimension (nrhs) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). stat (output) SuperLUStat_t* Record the statistics about the refinement steps. See util.h for the definition of SuperLUStat_t. info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement.
void pdgssvx | ( | superlu_dist_options_t * | , |
SuperMatrix * | , | ||
dScalePermstruct_t * | , | ||
double * | , | ||
int | , | ||
int | , | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
dSOLVEstruct_t * | , | ||
double * | , | ||
SuperLUStat_t * | , | ||
int * | |||
) |
void pdgssvx3d | ( | superlu_dist_options_t * | options, |
SuperMatrix * | A, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
double | B[], | ||
int | ldb, | ||
int | nrhs, | ||
gridinfo3d_t * | grid3d, | ||
dLUstruct_t * | LUstruct, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
double * | berr, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
int pdgssvx3d_csc_batch | ( | superlu_dist_options_t * | options, |
int | batchCount, | ||
int | m, | ||
int | n, | ||
int | nnz, | ||
int | nrhs, | ||
handle_t * | SparseMatrix_handles, | ||
double ** | RHSptr, | ||
int * | ldRHS, | ||
double ** | ReqPtr, | ||
double ** | CeqPtr, | ||
int ** | RpivPtr, | ||
int ** | CpivPtr, | ||
DiagScale_t * | DiagScale, | ||
handle_t * | F, | ||
double ** | Xptr, | ||
int * | ldX, | ||
double ** | Berrs, | ||
gridinfo3d_t * | grid3d, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Solve a batch of linear systems Ai * Xi = Bi with direct method, computing the LU factorization of each matrix Ai;
This is the fixed-size interface: all the input matrices have the same sparsity structure.
[in] | options | solver options |
[in] | batchCount | number of matrices in the batch |
[in] | m | row dimension of the matrices |
[in] | n | column dimension of the matrices |
[in] | nnz | number of non-zero entries in each matrix |
[in] | nrhs | number of right-hand-sides |
[in,out] | SparseMatrix_handles | array of sparse matrix handles, of size 'batchCount', each pointing to the actual storage in CSC format, see 'NCformat' in SuperMatix structure Each A is overwritten by row/col scaling R*A*C |
[in,out] | RHSptr | array of pointers to dense storage of right-hand sides B Each B is overwritten by row/col scaling R*B*C |
[in] | ldRHS | array of leading dimensions of RHS |
[in,out] | ReqPtr | array of pointers to diagonal row scaling vectors R, each of size m ReqPtr[] are allocated internally if equilibration is asked for |
[in,out] | CeqPtr | array of pointers to diagonal colum scaling vectors C, each of size n CeqPtr[] are allocated internally if equilibration is asked for |
[in,out] | RpivPtr | array of pointers to row permutation vectors, each of size m |
[in,out] | CpivPtr | array of pointers to column permutation vectors, each of size n |
[in,out] | DiagScale | array of indicators how equilibration is done for each matrix |
[out] | F | array of handles pointing to the factored matrices |
[out] | Xptr | array of pointers to dense storage of solution |
[in] | ldX | array of leading dimensions of X |
[out] | Berrs | array of poiniters to backward errors |
[in] |
[out] | stat | records algorithms statistics such as runtime, memory usage, etc. |
[out] | info | flags the errors on return |
!!! CHECK SETTING: TO BE SURE TO USE GPU VERSIONS !!!! gpu3dVersion superlu_acc_offload
void pdgssvx_ABglobal | ( | superlu_dist_options_t * | , |
SuperMatrix * | , | ||
dScalePermstruct_t * | , | ||
double * | , | ||
int | , | ||
int | , | ||
gridinfo_t * | , | ||
dLUstruct_t * | , | ||
double * | , | ||
SuperLUStat_t * | , | ||
int * | |||
) |
int_t pdgsTrBackSolve3d | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | x3d, | ||
double * | lsum3d, | ||
dxT_struct * | xT_s, | ||
double * | recvbuf, | ||
MPI_Request * | send_req, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
Loop over all the levels from root to leaf
Adding dlsumBmod_buff_t* lbmod_buf
int_t pdgsTrBackSolve3d_newsolve | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | x3d, | ||
double * | lsum3d, | ||
double * | recvbuf, | ||
MPI_Request * | send_req, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
Adding dlsumBmod_buff_t* lbmod_buf
int_t pdgstrf | ( | superlu_dist_options_t * | options, |
int | m, | ||
int | n, | ||
double | anorm, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose ======= PDGSTRF performs the LU factorization in parallel. Arguments ========= options (input) superlu_dist_options_t* The structure defines the input parameters to control how the LU decomposition will be performed. The following field should be defined: o ReplaceTinyPivot (yes_no_t) = NO: do not modify pivots = YES: replace tiny pivots by sqrt(epsilon)*norm(A) during LU factorization. m (input) int Number of rows in the matrix. n (input) int Number of columns in the matrix. anorm (input) double The norm of the original matrix A, or the scaled A if equilibration was done. LUstruct (input/output) dLUstruct_t* The data structures to store the distributed L and U factors. The following fields should be defined: o Glu_persist (input) Glu_persist_t* Global data structure (xsup, supno) replicated on all processes, describing the supernode partition in the factored matrices L and U: xsup[s] is the leading column of the s-th supernode, supno[i] is the supernode number to which column i belongs. o Llu (input/output) dLocalLU_t* The distributed data structures to store L and U factors. See superlu_ddefs.h for the definition of 'dLocalLU_t'. grid (input) gridinfo_t* The 2D process mesh. It contains the MPI communicator, the number of process rows (NPROW), the number of process columns (NPCOL), and my process rank. It is an input argument to all the parallel routines. Grid can be initialized by subroutine SUPERLU_GRIDINIT. See superlu_ddefs.h for the definition of 'gridinfo_t'. stat (output) SuperLUStat_t* Record the statistics on runtime and floating-point operation count. See util.h for the definition of 'SuperLUStat_t'. info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
void pdgstrf2 | ( | superlu_dist_options_t * | , |
int_t | nsupers, | ||
int_t | k0, | ||
int_t | k, | ||
double | thresh, | ||
Glu_persist_t * | , | ||
gridinfo_t * | , | ||
dLocalLU_t * | , | ||
MPI_Request * | , | ||
int | , | ||
SuperLUStat_t * | , | ||
int * | |||
) |
void pdgstrf2_trsm | ( | superlu_dist_options_t * | options, |
int_t | k0, | ||
int_t | k, | ||
double | thresh, | ||
Glu_persist_t * | Glu_persist, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
MPI_Request * | U_diag_blk_send_req, | ||
int | tag_ub, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose ======= Panel factorization -- block column k Factor diagonal and subdiagonal blocks and test for exact singularity. Only the column processes that own block column *k* participate in the work. Arguments ========= options (input) superlu_dist_options_t* (global) The structure defines the input parameters to control how the LU decomposition will be performed. k0 (input) int (global) Counter of the next supernode to be factorized. k (input) int (global) The column number of the block column to be factorized. thresh (input) double (global) The threshold value = s_eps * anorm. Glu_persist (input) Glu_persist_t* Global data structures (xsup, supno) replicated on all processes. grid (input) gridinfo_t* The 2D process mesh. Llu (input/output) dLocalLU_t* Local data structures to store distributed L and U matrices. U_diag_blk_send_req (input/output) MPI_Request* List of send requests to send down the diagonal block of U. tag_ub (input) int Upper bound of MPI tag values. stat (output) SuperLUStat_t* Record the statistics about the factorization. See SuperLUStat_t structure defined in util.h. info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.
ALWAYS SEND TO ALL OTHERS - TO FIX
ALWAYS SEND TO ALL OTHERS - TO FIX
int_t pdgstrf3d | ( | superlu_dist_options_t * | options, |
int | m, | ||
int | n, | ||
double | anorm, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
SCT_t * | SCT, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo3d_t * | grid3d, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
int_t pdgsTrForwardSolve3d | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | x3d, | ||
double * | lsum3d, | ||
dxT_struct * | xT_s, | ||
double * | recvbuf, | ||
MPI_Request * | send_req, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
Loop over all the levels from root to leaf
int_t pdgsTrForwardSolve3d_newsolve | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | x3d, | ||
double * | lsum3d, | ||
double * | recvbuf, | ||
MPI_Request * | send_req, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
xtrsTimer_t * | xtrsTimer | ||
) |
void pdgstrs | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
gridinfo_t * | grid, | ||
double * | B, | ||
int_t | m_loc, | ||
int_t | fst_row, | ||
int_t | ldb, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose ======= PDGSTRS solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factorization computed by PDGSTRF. If the equilibration, and row and column permutations were performed, the LU factorization was performed for A1 where A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U and the linear system solved is A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and the permutation to B1 by Pc*Pr is applied internally in this routine. Arguments ========= options (input) superlu_dist_options_t* The structure defines the input parameters to control how the LU decomposition and triangular solve are performed. n (input) int (global) The order of the system of linear equations. LUstruct (input) dLUstruct_t* The distributed data structures storing L and U factors. The L and U factors are obtained from PDGSTRF for the possibly scaled and permuted matrix A. See superlu_ddefs.h for the definition of 'dLUstruct_t'. A may be scaled and permuted into A1, so that A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U grid (input) gridinfo_t* The 2D process mesh. It contains the MPI communicator, the number of process rows (NPROW), the number of process columns (NPCOL), and my process rank. It is an input argument to all the parallel routines. Grid can be initialized by subroutine SUPERLU_GRIDINIT. See superlu_defs.h for the definition of 'gridinfo_t'. B (input/output) double* On entry, the distributed right-hand side matrix of the possibly equilibrated system. That is, B may be overwritten by diag(R)*B. On exit, the distributed solution matrix Y of the possibly equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X, and X is the solution of the original system. m_loc (input) int (local) The local row dimension of matrix B. fst_row (input) int (global) The row number of B's first row in the global matrix. ldb (input) int (local) The leading dimension of matrix B. nrhs (input) int (global) Number of right-hand sides. SOLVEstruct (input) dSOLVEstruct_t* (global) Contains the information for the communication during the solution phase. stat (output) SuperLUStat_t* Record the statistics about the triangular solves. See util.h for the definition of 'SuperLUStat_t'. info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value
void pdgstrs2 | ( | int_t | m, |
int_t | k0, | ||
int_t | k, | ||
Glu_persist_t * | Glu_persist, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
SuperLUStat_t * | stat | ||
) |
void pdgstrs2_omp | ( | int_t | k0, |
int_t | k, | ||
Glu_persist_t * | Glu_persist, | ||
gridinfo_t * | grid, | ||
dLocalLU_t * | Llu, | ||
Ublock_info_t * | Ublock_info, | ||
SuperLUStat_t * | stat | ||
) |
4/19/2019
4/19/2019
void pdgstrs3d | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | B, | ||
int_t | m_loc, | ||
int_t | fst_row, | ||
int_t | ldb, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose PDGSTRS solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factorization computed by PDGSTRF. If the equilibration, and row and column permutations were performed, the LU factorization was performed for A1 where A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U and the linear system solved is A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and the permutation to B1 by Pc*Pr is applied internally in this routine. Arguments n (input) int (global) The order of the system of linear equations. LUstruct (input) dLUstruct_t* The distributed data structures storing L and U factors. The L and U factors are obtained from PDGSTRF for the possibly scaled and permuted matrix A. See superlu_ddefs.h for the definition of 'dLUstruct_t'. A may be scaled and permuted into A1, so that A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U grid (input) gridinfo_t* The 2D process mesh. It contains the MPI communicator, the number of process rows (NPROW), the number of process columns (NPCOL), and my process rank. It is an input argument to all the parallel routines. Grid can be initialized by subroutine SUPERLU_GRIDINIT. See superlu_defs.h for the definition of 'gridinfo_t'. B (input/output) double* On entry, the distributed right-hand side matrix of the possibly equilibrated system. That is, B may be overwritten by diag(R)*B. On exit, the distributed solution matrix Y of the possibly equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X, and X is the solution of the original system. m_loc (input) int (local) The local row dimension of matrix B. fst_row (input) int (global) The row number of B's first row in the global matrix. ldb (input) int (local) The leading dimension of matrix B. nrhs (input) int (global) Number of right-hand sides. SOLVEstruct (input) dSOLVEstruct_t* (global) Contains the information for the communication during the solution phase. stat (output) SuperLUStat_t* Record the statistics about the triangular solves. See util.h for the definition of 'SuperLUStat_t'. info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value
Initializing xT
Setup the headers for xT
Reduce the Solve flops from all the grids to grid zero
void pdgstrs3d_newsolve | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
dtrf3Dpartition_t * | trf3Dpartition, | ||
gridinfo3d_t * | grid3d, | ||
double * | B, | ||
int_t | m_loc, | ||
int_t | fst_row, | ||
int_t | ldb, | ||
int | nrhs, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Reduce the Solve flops from all the grids to grid zero
void pdgstrs_Bglobal | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
dLUstruct_t * | LUstruct, | ||
gridinfo_t * | grid, | ||
double * | B, | ||
int_t | ldb, | ||
int | nrhs, | ||
SuperLUStat_t * | stat, | ||
int * | info | ||
) |
Purpose ======= pdgstrs_Bglobal solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factorization computed by pdgstrf. Arguments ========= options (input) superlu_dist_options_t* The structure defines the input parameters to control how the LU decomposition and triangular solve are performed. n (input) int (global) The order of the system of linear equations. LUstruct (input) dLUstruct_t* The distributed data structures storing L and U factors. The L and U factors are obtained from pdgstrf for the possibly scaled and permuted matrix A. See superlu_ddefs.h for the definition of 'dLUstruct_t'. grid (input) gridinfo_t* The 2D process mesh. It contains the MPI communicator, the number of process rows (NPROW), the number of process columns (NPCOL), and my process rank. It is an input argument to all the parallel routines. Grid can be initialized by subroutine SUPERLU_GRIDINIT. See superlu_ddefs.h for the definition of 'gridinfo_t'. B (input/output) double* On entry, the right-hand side matrix of the possibly equilibrated and row permuted system. On exit, the solution matrix of the possibly equilibrated and row permuted system if info = 0; NOTE: Currently, the N-by-NRHS matrix B must reside on all processes when calling this routine. ldb (input) int (global) Leading dimension of matrix B. nrhs (input) int (global) Number of right-hand sides. stat (output) SuperLUStat_t* Record the statistics about the triangular solves. See util.h for the definition of 'SuperLUStat_t'. info (output) int* = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value
int_t pdgstrs_delete_device_lsum_x | ( | dSOLVEstruct_t * | SOLVEstruct | ) |
int_t pdgstrs_init | ( | int_t | n, |
int_t | m_loc, | ||
int_t | nrhs, | ||
int_t | fst_row, | ||
int_t | perm_r[], | ||
int_t | perm_c[], | ||
gridinfo_t * | grid, | ||
Glu_persist_t * | Glu_persist, | ||
dSOLVEstruct_t * | SOLVEstruct | ||
) |
Purpose ======= Set up the communication pattern for redistribution between B and X in the triangular solution. Arguments ========= n (input) int (global) The dimension of the linear system. m_loc (input) int (local) The local row dimension of the distributed input matrix. nrhs (input) int (global) Number of right-hand sides. fst_row (input) int (global) The row number of matrix B's first row in the global matrix. perm_r (input) int* (global) The row permutation vector. perm_c (input) int* (global) The column permutation vector. grid (input) gridinfo_t* The 2D process mesh.
int_t pdgstrs_init_device_lsum_x | ( | superlu_dist_options_t * | options, |
int_t | n, | ||
int_t | m_loc, | ||
int_t | nrhs, | ||
gridinfo_t * | grid, | ||
dLUstruct_t * | LUstruct, | ||
dSOLVEstruct_t * | SOLVEstruct, | ||
int * | supernodeMask | ||
) |
void pdinf_norm_error | ( | int | iam, |
int_t | n, | ||
int_t | nrhs, | ||
double | x[], | ||
int_t | ldx, | ||
double | xtrue[], | ||
int_t | ldxtrue, | ||
MPI_Comm | slucomm | ||
) |
Check the inf-norm of the error vector.
double pdlangs | ( | char * | norm, |
SuperMatrix * | A, | ||
gridinfo_t * | grid | ||
) |
Purpose ======= PDLANGS returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real matrix A. Description =========== PDLANGE returns the value PDLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' ( ( norm1(A), NORM = '1', 'O' or 'o' ( ( normI(A), NORM = 'I' or 'i' ( ( normF(A), NORM = 'F', 'f', 'E' or 'e' where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. Arguments ========= NORM (input) CHARACTER*1 Specifies the value to be returned in DLANGE as described above. A (input) SuperMatrix* The M by N sparse matrix A. GRID (input) gridinof_t* The 2D process mesh. =====================================================================
void pdlaqgs | ( | SuperMatrix * | A, |
double * | r, | ||
double * | c, | ||
double | rowcnd, | ||
double | colcnd, | ||
double | amax, | ||
char * | equed | ||
) |
Purpose ======= PDLAQGS equilibrates a general sparse M by N matrix A using the row and column scaling factors in the vectors R and C. See supermatrix.h for the definition of 'SuperMatrix' structure. Arguments ========= A (input/output) SuperMatrix* On exit, the equilibrated matrix. See EQUED for the form of the equilibrated matrix. The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. R (input) double*, dimension (A->nrow) The row scale factors for A. C (input) double*, dimension (A->ncol) The column scale factors for A. ROWCND (input) double Ratio of the smallest R(i) to the largest R(i). COLCND (input) double Ratio of the smallest C(i) to the largest C(i). AMAX (input) double Absolute value of largest matrix entry. EQUED (output) char* Specifies the form of equilibration that was done. = 'N': No equilibration = 'R': Row equilibration, i.e., A has been premultiplied by diag(R). = 'C': Column equilibration, i.e., A has been postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A has been replaced by diag(R) * A * diag(C). Internal Parameters =================== THRESH is a threshold value used to decide if row or column scaling should be done based on the ratio of the row or column scaling factors. If ROWCND < THRESH, row scaling is done, and if COLCND < THRESH, column scaling is done. LARGE and SMALL are threshold values used to decide if row scaling should be done based on the absolute size of the largest matrix element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. =====================================================================
int pdPermute_Dense_Matrix | ( | int_t | fst_row, |
int_t | m_loc, | ||
int_t | row_to_proc[], | ||
int_t | perm[], | ||
double | X[], | ||
int | ldx, | ||
double | B[], | ||
int | ldb, | ||
int | nrhs, | ||
gridinfo_t * | grid | ||
) |
Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-th row of B.
int_t pdReDistribute3d_B_to_X | ( | double * | B, |
int_t | m_loc, | ||
int | nrhs, | ||
int_t | ldb, | ||
int_t | fst_row, | ||
int_t * | ilsum, | ||
double * | x, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_persist_t * | Glu_persist, | ||
gridinfo3d_t * | grid3d, | ||
dSOLVEstruct_t * | SOLVEstruct | ||
) |
Purpose Re-distribute B on the diagonal processes of the 2D process mesh (only on grid 0). Note This routine can only be called after the routine pxgstrs_init(), in which the structures of the send and receive buffers are set up. Arguments B (input) double* The distributed right-hand side matrix of the possibly equilibrated system. m_loc (input) int (local) The local row dimension of matrix B. nrhs (input) int (global) Number of right-hand sides. ldb (input) int (local) Leading dimension of matrix B. fst_row (input) int (global) The row number of B's first row in the global matrix. ilsum (input) int* (global) Starting position of each supernode in a full array. x (output) double* The solution vector. It is valid only on the diagonal processes. ScalePermstruct (input) dScalePermstruct_t* The data structure to store the scaling and permutation vectors describing the transformations performed to the original matrix A. grid (input) gridinfo_t* The 2D process mesh. SOLVEstruct (input) dSOLVEstruct_t* Contains the information for the communication during the solution phase. Return value
int_t pdReDistribute3d_X_to_B | ( | int_t | n, |
double * | B, | ||
int_t | m_loc, | ||
int_t | ldb, | ||
int_t | fst_row, | ||
int | nrhs, | ||
double * | x, | ||
int_t * | ilsum, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_persist_t * | Glu_persist, | ||
gridinfo3d_t * | grid3d, | ||
dSOLVEstruct_t * | SOLVEstruct | ||
) |
Purpose Re-distribute X on the diagonal processes to B distributed on all the processes (only on grid 0) Note This routine can only be called after the routine pxgstrs_init(), in which the structures of the send and receive buffers are set up.
int_t pdReDistribute_B_to_X | ( | double * | B, |
int_t | m_loc, | ||
int | nrhs, | ||
int_t | ldb, | ||
int_t | fst_row, | ||
int_t * | ilsum, | ||
double * | x, | ||
dScalePermstruct_t * | ScalePermstruct, | ||
Glu_persist_t * | Glu_persist, | ||
gridinfo_t * | grid, | ||
dSOLVEstruct_t * | SOLVEstruct | ||
) |
Purpose ======= Re-distribute B on the diagonal processes of the 2D process mesh. Note ==== This routine can only be called after the routine pdgstrs_init(), in which the structures of the send and receive buffers are set up. Arguments ========= B (input) double* The distributed right-hand side matrix of the possibly equilibrated system. m_loc (input) int (local) The local row dimension of matrix B. nrhs (input) int (global) Number of right-hand sides. ldb (input) int (local) Leading dimension of matrix B. fst_row (input) int (global) The row number of B's first row in the global matrix. ilsum (input) int* (global) Starting position of each supernode in a full array. x (output) double* The solution vector. It is valid only on the diagonal processes. ScalePermstruct (input) dScalePermstruct_t* The data structure to store the scaling and permutation vectors describing the transformations performed to the original matrix A. grid (input) gridinfo_t* The 2D process mesh. SOLVEstruct (input) dSOLVEstruct_t* Contains the information for the communication during the solution phase. Return value ============
void Printdouble5 | ( | char * | name, |
int_t | len, | ||
double * | x | ||
) |
void pxgstrs_finalize | ( | pxgstrs_comm_t * | gstrs_comm | ) |
int_t scuStatUpdate | ( | int_t | knsupc, |
HyP_t * | HyP, | ||
SCT_t * | SCT, | ||
SuperLUStat_t * | stat | ||
) |
int sp_dgemm_dist | ( | char * | transa, |
int | n, | ||
double | alpha, | ||
SuperMatrix * | A, | ||
double * | b, | ||
int | ldb, | ||
double | beta, | ||
double * | c, | ||
int | ldc | ||
) |
Purpose ======= sp_dgemm_dist performs one of the matrix-matrix operations C := alpha*op( A )*op( B ) + beta*C, where op( X ) is one of op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. Parameters ========== TRANSA - (input) char* On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows: TRANSA = 'N' or 'n', op( A ) = A. TRANSA = 'T' or 't', op( A ) = A'. TRANSA = 'C' or 'c', op( A ) = conjg( A' ). Unchanged on exit. TRANSB - (input) char* On entry, TRANSB specifies the form of op( B ) to be used in the matrix multiplication as follows: TRANSB = 'N' or 'n', op( B ) = B. TRANSB = 'T' or 't', op( B ) = B'. TRANSB = 'C' or 'c', op( B ) = conjg( B' ). Unchanged on exit. M - (input) int On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. Unchanged on exit. N - (input) int On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. Unchanged on exit. K - (input) int On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. Unchanged on exit. ALPHA - (input) double On entry, ALPHA specifies the scalar alpha. A - (input) SuperMatrix* Matrix A with a sparse format, of dimension (A->nrow, A->ncol). Currently, the type of A can be: Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. In the future, more general A can be handled. B - double array of DIMENSION ( LDB, kb ), where kb is n when TRANSB = 'N' or 'n', and is k otherwise. Before entry with TRANSB = 'N' or 'n', the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. Unchanged on exit. LDB - (input) int On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, n ). Unchanged on exit. BETA - (input) double On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. C - double array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*B + beta*C ). LDC - (input) int On entry, LDC specifies the first dimension of C as declared in the calling (sub)program. LDC must be at least max(1,m). Unchanged on exit. ==== Sparse Level 3 Blas routine.
int sp_dgemv_dist | ( | char * | trans, |
double | alpha, | ||
SuperMatrix * | A, | ||
double * | x, | ||
int | incx, | ||
double | beta, | ||
double * | y, | ||
int | incy | ||
) |
SpGEMV.
Purpose ======= sp_dgemv_dist() performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, where alpha and beta are scalars, x and y are vectors and A is a sparse A->nrow by A->ncol matrix. Parameters ========== TRANS - (input) char* On entry, TRANS specifies the operation to be performed as follows: TRANS = 'N' or 'n' y := alpha*A*x + beta*y. TRANS = 'T' or 't' y := alpha*A'*x + beta*y. TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. ALPHA - (input) double On entry, ALPHA specifies the scalar alpha. A - (input) SuperMatrix* Matrix A with a sparse format, of dimension (A->nrow, A->ncol). Currently, the type of A can be: Stype = SLU_NC or SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. In the future, more general A can be handled. X - (input) double*, array of DIMENSION at least ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, the incremented array X must contain the vector x. INCX - (input) int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. BETA - (input) double On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. Y - (output) double*, array of DIMENSION at least ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry with BETA non-zero, the incremented array Y must contain the vector y. On exit, Y is overwritten by the updated vector y. INCY - (input) int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. ==== Sparse Level 2 Blas routine.
int sp_dtrsv_dist | ( | char * | uplo, |
char * | trans, | ||
char * | diag, | ||
SuperMatrix * | L, | ||
SuperMatrix * | U, | ||
double * | x, | ||
int * | info | ||
) |
Purpose ======= sp_dtrsv_dist() solves one of the systems of equations A*x = b, or A'*x = b, where b and x are n element vectors and A is a sparse unit , or non-unit, upper or lower triangular matrix. No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine. Parameters ========== uplo - (input) char* On entry, uplo specifies whether the matrix is an upper or lower triangular matrix as follows: uplo = 'U' or 'u' A is an upper triangular matrix. uplo = 'L' or 'l' A is a lower triangular matrix. trans - (input) char* On entry, trans specifies the equations to be solved as follows: trans = 'N' or 'n' A*x = b. trans = 'T' or 't' A'*x = b. trans = 'C' or 'c' A'*x = b. diag - (input) char* On entry, diag specifies whether or not A is unit triangular as follows: diag = 'U' or 'u' A is assumed to be unit triangular. diag = 'N' or 'n' A is not assumed to be unit triangular. L - (input) SuperMatrix* The factor L from the factorization Pr*A*Pc=L*U. Use compressed row subscripts storage for supernodes, i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. U - (input) SuperMatrix* The factor U from the factorization Pr*A*Pc=L*U. U has types: Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU. x - (input/output) double* Before entry, the incremented array X must contain the n element right-hand side vector b. On exit, X is overwritten with the solution vector x. info - (output) int* If *info = -i, the i-th argument had an illegal value.
int superlu_daxpy | ( | const int | n, |
const double | alpha, | ||
const double * | x, | ||
const int | incx, | ||
double * | y, | ||
const int | incy | ||
) |
int superlu_dgemm | ( | const char * | transa, |
const char * | transb, | ||
int | m, | ||
int | n, | ||
int | k, | ||
double | alpha, | ||
double * | a, | ||
int | lda, | ||
double * | b, | ||
int | ldb, | ||
double | beta, | ||
double * | c, | ||
int | ldc | ||
) |
int superlu_dgemv | ( | const char * | trans, |
const int | m, | ||
const int | n, | ||
const double | alpha, | ||
const double * | a, | ||
const int | lda, | ||
const double * | x, | ||
const int | incx, | ||
const double | beta, | ||
double * | y, | ||
const int | incy | ||
) |
int superlu_dger | ( | const int | m, |
const int | n, | ||
const double | alpha, | ||
const double * | x, | ||
const int | incx, | ||
const double * | y, | ||
const int | incy, | ||
double * | a, | ||
const int | lda | ||
) |
int superlu_dscal | ( | const int | n, |
const double | alpha, | ||
double * | x, | ||
const int | incx | ||
) |
int superlu_dtrsm | ( | const char * | sideRL, |
const char * | uplo, | ||
const char * | transa, | ||
const char * | diag, | ||
const int | m, | ||
const int | n, | ||
const double | alpha, | ||
const double * | a, | ||
const int | lda, | ||
double * | b, | ||
const int | ldb | ||
) |
int superlu_dtrsv | ( | char * | uplo, |
char * | trans, | ||
char * | diag, | ||
int | n, | ||
double * | a, | ||
int | lda, | ||
double * | x, | ||
int | incx | ||
) |
int updateDirtyBit | ( | int_t | k0, |
HyP_t * | HyP, | ||
gridinfo_t * | grid | ||
) |
void validateInput_pdgssvx3d | ( | superlu_dist_options_t * | options, |
SuperMatrix * | A, | ||
int | ldb, | ||
int | nrhs, | ||
gridinfo3d_t * | grid3d, | ||
int * | info | ||
) |
Validates the input parameters for a given problem.
This function checks the input parameters for a given problem and sets the error code in the 'info' variable accordingly. If there is an error, it prints an error message and returns.
[in] | options | Pointer to the options structure containing Fact, RowPerm, ColPerm, and IterRefine values. |
[in] | A | Pointer to the matrix A structure containing nrow, ncol, Stype, Dtype, and Mtype values. |
[in] | ldb | The leading dimension of the array B. |
[in] | nrhs | The number of right-hand sides. |
[in] | grid | Pointer to the grid structure. |
[out] | info | Pointer to an integer variable that stores the error code. |
|
extern |
|
extern |