SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
superlu_defs.h File Reference

Definitions which are precision-neutral. More...

#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <stdint.h>
#include "superlu_dist_config.h"
#include "superlu_FortranCInterface.h"
#include "superlu_FCnames.h"
#include "superlu_enum_consts.h"
#include "supermatrix.h"
#include "util_dist.h"
#include "psymbfact.h"
Include dependency graph for superlu_defs.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  superlu_scope_t
 
struct  gridinfo_t
 
struct  gridinfo3d_t
 
struct  Glu_persist_t
 
struct  Glu_freeable_t
 
struct  pxgstrs_comm_t
 
struct  superlu_dist_options_t
 
struct  superlu_dist_mem_usage_t
 
struct  Ucb_indptr_t
 
struct  Ublock_info_t
 
struct  Remain_info_t
 
struct  etree_node
 
struct  superlu_pair
 
struct  uPanelInfo_t
 
struct  lPanelInfo_t
 
struct  packLUInfo_t
 
struct  HyP_t
 
struct  local_l_blk_info_t
 
struct  local_u_blk_info_t
 
struct  perm_array_t
 
struct  factStat_t
 
struct  d2Hreduce_t
 
struct  treeList_t
 
struct  treeTopoInfo_t
 
struct  gEtreeInfo_t
 
struct  sForest_t
 
struct  commRequests_t
 
struct  factNodelists_t
 
struct  msgs_t
 
struct  xtrsTimer_t
 
struct  C_Tree
 

Macros

#define SUPERLU_DIST_MAJOR_VERSION   8
 
#define SUPERLU_DIST_MINOR_VERSION   2
 
#define SUPERLU_DIST_PATCH_VERSION   1
 
#define SUPERLU_DIST_RELEASE_DATE   "November 17, 2023"
 
#define mpi_int_t   MPI_LONG_LONG_INT
 
#define IFMT   "%lld"
 
#define SuperLU_MPI_COMPLEX   MPI_C_COMPLEX
 
#define SuperLU_MPI_DOUBLE_COMPLEX   MPI_C_DOUBLE_COMPLEX
 
#define MAX_SUPER_SIZE   512 /* Sherry: moved from superlu_gpu.cu */
 
#define ISORT   /* NOTE: qsort() has bug on Mac */
 
#define BC_HEADER   2
 
#define LB_DESCRIPTOR   2
 
#define BR_HEADER   3
 
#define UB_DESCRIPTOR   2
 
#define BC_HEADER_NEWU   3
 
#define UB_DESCRIPTOR_NEWU   2
 
#define NBUFFERS   5
 
#define NTAGS   INT_MAX
 
#define UjROW   10
 
#define UkSUB   11
 
#define UkVAL   12
 
#define LkSUB   13
 
#define LkVAL   14
 
#define LkkDIAG   15
 
#define XK_H   2 /* The header preceding each X block. */
 
#define LSUM_H   2 /* The header preceding each MOD block. */
 
#define GSUM   20
 
#define Xk   21
 
#define Yk   22
 
#define LSUM   23
 
#define COMM_ALL   100
 
#define COMM_COLUMN   101
 
#define COMM_ROW   102
 
#define SUPER_LINEAR   11
 
#define SUPER_BLOCK   12
 
#define NO_MARKER   3
 
#define IAM(comm)   { int rank; MPI_Comm_rank ( comm, &rank ); rank};
 
#define MYROW(iam, grid)   ( (iam) / grid->npcol )
 
#define MYCOL(iam, grid)   ( (iam) % grid->npcol )
 
#define BlockNum(i)   ( supno[i] )
 
#define FstBlockC(bnum)   ( xsup[bnum] )
 
#define SuperSize(bnum)   ( xsup[bnum+1]-xsup[bnum] )
 
#define LBi(bnum, grid)   ( (bnum)/grid->nprow )/* Global to local block rowwise */
 
#define LBj(bnum, grid)   ( (bnum)/grid->npcol )/* Global to local block columnwise*/
 
#define PROW(bnum, grid)   ( (bnum) % grid->nprow )
 
#define PCOL(bnum, grid)   ( (bnum) % grid->npcol )
 
#define PNUM(i, j, grid)   ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */
 
#define CEILING(a, b)   ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )
 
#define RHS_ITERATE(i)    for (i = 0; i < nrhs; ++i)
 
#define X_BLK(i)    ilsum[i] * nrhs + (i+1) * XK_H
 
#define LSUM_BLK(i)    ilsum[i] * nrhs + (i+1) * LSUM_H
 
#define SuperLU_timer_   SuperLU_timer_dist_
 
#define LOG2(x)   (log10((double) x) / log10(2.0))
 
#define VT_TRACEON
 
#define VT_TRACEOFF
 
#define SUPERLU_DIST_EXPORT
 
#define MAGMA_CONST
 
#define DIM_X   16
 
#define DIM_Y   16
 
#define BLK_M   DIM_X*4
 
#define BLK_N   DIM_Y*4
 
#define BLK_K   2048/(BLK_M)
 
#define DIM_XA   DIM_X
 
#define DIM_YA   DIM_Y
 
#define DIM_XB   DIM_X
 
#define DIM_YB   DIM_Y
 
#define NWARP   DIM_X*DIM_Y/32
 
#define THR_M   ( BLK_M / DIM_X )
 
#define THR_N   ( BLK_N / DIM_Y )
 
#define fetch(A, m, n, bound)   offs_d##A[min(n*LD##A+m, bound)]
 
#define fma(A, B, C)   C += (A*B)
 
#define cmax(a, b)   ((a) > (b) ? (a) : (b))
 
#define SLU_MPI_TAG(id, num)   ( (6*(num)+id) % tag_ub )
 
#define __SUPERLU_ASYNC_TREE
 
#define DEG_TREE   2
 

Typedefs

typedef int64_t int_t
 
typedef enum treePartStrat treePartStrat
 
typedef struct xtrsTimer_t xtrsTimer_t
 

Enumerations

enum  treePartStrat { ND , GD , ND , GD }
 

Functions

void superlu_gridinit (MPI_Comm, int, int, gridinfo_t *)
 All processes in the MPI communicator must call this routine. More...
 
void superlu_gridmap (MPI_Comm, int, int, int[], int, gridinfo_t *)
 All processes in the MPI communicator must call this routine. More...
 
void superlu_gridexit (gridinfo_t *)
 
void superlu_gridinit3d (MPI_Comm Bcomm, int nprow, int npcol, int npdep, gridinfo3d_t *grid)
 All processes in the MPI communicator must call this routine. More...
 
void superlu_gridmap3d (MPI_Comm, int, int, int, int[], gridinfo3d_t *)
 All processes in the MPI communicator must call this routine. On output, if a process is not in the SuperLU group, the following values are assigned to it: grid->comm = MPI_COMM_NULL grid->iam = -1. More...
 
void superlu_gridexit3d (gridinfo3d_t *grid)
 
void set_default_options_dist (superlu_dist_options_t *)
 Set the default values for the options argument. More...
 
void print_options_dist (superlu_dist_options_t *)
 Print the options setting. More...
 
void print_sp_ienv_dist (superlu_dist_options_t *)
 Print the blocking parameters. More...
 
void Destroy_CompCol_Matrix_dist (SuperMatrix *)
 
void Destroy_SuperNode_Matrix_dist (SuperMatrix *)
 
void Destroy_SuperMatrix_Store_dist (SuperMatrix *)
 Deallocate the structure pointing to the actual storage of the matrix. More...
 
void Destroy_CompCol_Permuted_dist (SuperMatrix *)
 A is of type Stype==NCP. More...
 
void Destroy_CompRowLoc_Matrix_dist (SuperMatrix *)
 
void Destroy_CompRow_Matrix_dist (SuperMatrix *)
 
void sp_colorder (superlu_dist_options_t *, SuperMatrix *, int_t *, int_t *, SuperMatrix *)
 
int sp_symetree_dist (int_t *, int_t *, int_t *, int_t, int_t *)
 Symmetric elimination tree. More...
 
int sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *)
 Nonsymmetric elimination tree. More...
 
void get_perm_c_dist (int_t, int_t, SuperMatrix *, int_t *)
 
void at_plus_a_dist (const int_t, const int_t, int_t *, int_t *, int_t *, int_t **, int_t **)
 
int genmmd_dist_ (int_t *, int_t *, int_t *a, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *)
 
void bcast_tree (void *, int, MPI_Datatype, int, int, gridinfo_t *, int, int *)
 
int_t symbfact (superlu_dist_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
 
int_t symbfact_SubInit (superlu_dist_options_t *options, fact_t, void *, int_t, int_t, int_t, int_t, Glu_persist_t *, Glu_freeable_t *)
 
int_t symbfact_SubXpand (int_t, int_t, int_t, MemType, int_t *, Glu_freeable_t *)
 
int_t symbfact_SubFree (Glu_freeable_t *)
 
void countnz_dist (const int_t, int_t *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
 
int64_t fixupL_dist (const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *)
 
int_tTreePostorder_dist (int_t, int_t *)
 
float smach_dist (char *)
 
double dmach_dist (char *)
 
void * superlu_malloc_dist (size_t)
 
void superlu_free_dist (void *)
 
int * int32Malloc_dist (int)
 
int * int32Calloc_dist (int)
 
int_tintMalloc_dist (int_t)
 
int_tintCalloc_dist (int_t)
 
int mc64id_dist (int *)
 
void arrive_at_ublock (int_t, int_t *, int_t *, int_t *, int_t *, int_t *, int_t, int_t, int_t *, int_t *, int_t *, gridinfo_t *)
 
int_t estimate_bigu_size (int_t, int_t **, Glu_persist_t *, gridinfo_t *, int_t *, int_t *)
 
void superlu_abort_and_exit_dist (char *)
 
int sp_ienv_dist (int, superlu_dist_options_t *)
 
void ifill_dist (int_t *, int_t, int_t)
 Fills an integer array with a given value. More...
 
void super_stats_dist (int_t, int_t *)
 
void get_diag_procs (int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **)
 
int_t QuerySpace_dist (int_t, int_t, Glu_freeable_t *, superlu_dist_mem_usage_t *)
 
int xerr_dist (char *, int *)
 
void pxerr_dist (char *, gridinfo_t *, int_t)
 
void PStatInit (SuperLUStat_t *)
 
void PStatClear (SuperLUStat_t *)
 
void PStatFree (SuperLUStat_t *)
 
void PStatPrint (superlu_dist_options_t *, SuperLUStat_t *, gridinfo_t *)
 
void log_memory (int64_t, SuperLUStat_t *)
 
void print_memorylog (SuperLUStat_t *, char *)
 
int superlu_dist_GetVersionNumber (int *, int *, int *)
 
void quickSort (int_t *, int_t, int_t, int_t)
 
void quickSortM (int_t *, int_t, int_t, int_t, int_t, int_t)
 
int_t partition (int_t *, int_t, int_t, int_t)
 
int_t partitionM (int_t *, int_t, int_t, int_t, int_t, int_t)
 
float symbfact_dist (superlu_dist_options_t *, int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *, Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, superlu_dist_mem_usage_t *)
 
float get_perm_c_parmetis (SuperMatrix *, int_t *, int_t *, int, int, int_t **, int_t **, gridinfo_t *, MPI_Comm *)
 
int_t psymbfact_LUXpandMem (int, int_t, int_t, int_t, int_t, int, int, int, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
 
int_t psymbfact_LUXpand (int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
 
int_t psymbfact_LUXpand_RL (int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
 
int_t psymbfact_prLUXpand (int_t, int_t, int, Llu_symbfact_t *, psymbfact_stat_t *)
 
void isort (int_t N, int_t *ARRAY1, int_t *ARRAY2)
 
void isort1 (int_t N, int_t *ARRAY)
 
double estimate_cpu_time (int m, int n, int k)
 
int get_thread_per_process (void)
 
int_t get_max_buffer_size (void)
 
int_t get_min (int_t *, int_t)
 
int compare_pair (const void *, const void *)
 
int_t static_partition (struct superlu_pair *, int_t, int_t *, int_t, int_t *, int_t *, int)
 
int get_acc_offload (void)
 
void print_panel_seg_dist (int_t, int_t, int_t, int_t, int_t *, int_t *)
 Diagnostic print of segment info after panel_dfs(). More...
 
void check_repfnz_dist (int_t, int_t, int_t, int_t *)
 Check whether repfnz[] == EMPTY after reset. More...
 
int_t CheckZeroDiagonal (int_t, int_t *, int_t *, int_t *)
 
int check_perm_dist (char *what, int_t n, int_t *perm)
 
void PrintDouble5 (char *, int_t, double *)
 
void PrintInt10 (char *, int_t, int_t *)
 
void PrintInt32 (char *, int, int *)
 
int file_PrintInt10 (FILE *, char *, int_t, int_t *)
 
int file_PrintInt32 (FILE *, char *, int, int *)
 
int file_PrintLong10 (FILE *, char *, int_t, int_t *)
 
void C_RdTree_Create (C_Tree *tree, MPI_Comm comm, int *ranks, int rank_cnt, int msgSize, char precision)
 
void C_RdTree_Nullify (C_Tree *tree)
 
yes_no_t C_RdTree_IsRoot (C_Tree *tree)
 
void C_RdTree_forwardMessageSimple (C_Tree *Tree, void *localBuffer, int msgSize)
 
void C_RdTree_waitSendRequest (C_Tree *Tree)
 
void C_BcTree_Create (C_Tree *tree, MPI_Comm comm, int *ranks, int rank_cnt, int msgSize, char precision)
 
void C_BcTree_Nullify (C_Tree *tree)
 
yes_no_t C_BcTree_IsRoot (C_Tree *tree)
 
void C_BcTree_forwardMessageSimple (C_Tree *tree, void *localBuffer, int msgSize)
 
void C_BcTree_waitSendRequest (C_Tree *tree)
 
void DistPrint (char *function_name, double value, char *Units, gridinfo_t *grid)
 
void DistPrint3D (char *function_name, double value, char *Units, gridinfo3d_t *grid3d)
 
void treeImbalance3D (gridinfo3d_t *grid3d, SCT_t *SCT)
 
void SCT_printComm3D (gridinfo3d_t *grid3d, SCT_t *SCT)
 
int_tgetPerm_c_supno (int_t nsupers, superlu_dist_options_t *, int_t *etree, Glu_persist_t *Glu_persist, int_t **Lrowind_bc_ptr, int_t **Ufstnz_br_ptr, gridinfo_t *)
 
void SCT_init (SCT_t *)
 
void SCT_print (gridinfo_t *grid, SCT_t *SCT)
 
void SCT_print3D (gridinfo3d_t *grid3d, SCT_t *SCT)
 
void SCT_free (SCT_t *)
 
treeList_tsetree2list (int_t nsuper, int_t *setree)
 
int free_treelist (int_t nsuper, treeList_t *treeList)
 
int_t calcTreeWeight (int_t nsupers, int_t *setree, treeList_t *treeList, int_t *xsup)
 
int_t getDescendList (int_t k, int_t *dlist, treeList_t *treeList)
 
int_t getCommonAncestorList (int_t k, int_t *alist, int_t *seTree, treeList_t *treeList)
 
int_t getCommonAncsCount (int_t k, treeList_t *treeList)
 
int_tgetPermNodeList (int_t nnode, int_t *nlist, int_t *perm_c_sup, int_t *iperm_c_sup)
 
int_tgetEtreeLB (int_t nnodes, int_t *perm_l, int_t *gTopOrder)
 
int_tgetSubTreeRoots (int_t k, treeList_t *treeList)
 
int_tmerg_perms (int_t nperms, int_t *nnodes, int_t **perms)
 
int_tgetGlobal_iperm (int_t nsupers, int_t nperms, int_t **perms, int_t *nnodes)
 
int_t log2i (int_t index)
 
int_tsupernodal_etree (int_t nsuper, int_t *etree, int_t *supno, int_t *xsup)
 
int_t testSubtreeNodelist (int_t nsupers, int_t numList, int_t **nodeList, int_t *nodeCount)
 
int_t testListPerm (int_t nodeCount, int_t *nodeList, int_t *permList, int_t *gTopLevel)
 
int_ttopological_ordering (int_t nsuper, int_t *setree)
 
int_tEtree_LevelBoundry (int_t *perm, int_t *tsort_etree, int_t nsuper)
 
int_tcalculate_num_children (int_t nsuper, int_t *setree)
 
void Print_EtreeLevelBoundry (int_t *Etree_LvlBdry, int_t max_level, int_t nsuper)
 
void print_etree_leveled (int_t *setree, int_t *tsort_etree, int_t nsuper)
 
void print_etree (int_t *setree, int_t *iperm, int_t nsuper)
 
int_t printFileList (char *sname, int_t nnodes, int_t *dlist, int_t *setree)
 
int * getLastDepBtree (int_t nsupers, treeList_t *treeList)
 
int_tgetReplicatedTrees (gridinfo3d_t *grid3d)
 
int_tgetGridTrees (gridinfo3d_t *grid3d)
 
int_t ** getNodeList (int_t maxLvl, int_t *setree, int_t *nnodes, int_t *treeHeads, treeList_t *treeList)
 
int_tcalcNumNodes (int_t maxLvl, int_t *treeHeads, treeList_t *treeList)
 
int_tgetTreeHeads (int_t maxLvl, int_t nsupers, treeList_t *treeList)
 
int_tgetMyIperm (int_t nnodes, int_t nsupers, int_t *myPerm)
 
int_tgetMyTopOrder (int_t nnodes, int_t *myPerm, int_t *myIperm, int_t *setree)
 
int_tgetMyEtLims (int_t nnodes, int_t *myTopOrder)
 
treeTopoInfo_t getMyTreeTopoInfo (int_t nnodes, int_t nsupers, int_t *myPerm, int_t *setree)
 
sForest_t ** getNestDissForests (int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
 
int_t ** getTreePermForest (int_t *myTreeIdxs, int_t *myZeroTrIdxs, sForest_t *sForests, int_t *perm_c_supno, int_t *iperm_c_supno, gridinfo3d_t *grid3d)
 
int_t ** getTreePermFr (int_t *myTreeIdxs, sForest_t **sForests, gridinfo3d_t *grid3d)
 
int_tgetMyNodeCountsFr (int_t maxLvl, int_t *myTreeIdxs, sForest_t **sForests)
 
int_t ** getNodeListFr (int_t maxLvl, sForest_t **sForests)
 
int_tgetNodeCountsFr (int_t maxLvl, sForest_t **sForests)
 
int * getIsNodeInMyGrid (int_t nsupers, int_t maxLvl, int_t *myNodeCount, int_t **treePerm)
 
void printForestWeightCost (sForest_t **sForests, SCT_t *SCT, gridinfo3d_t *grid3d)
 
sForest_t ** getGreedyLoadBalForests (int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
 
sForest_t ** getForests (int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
 
int_t getBigUSize (superlu_dist_options_t *, int_t nsupers, gridinfo_t *grid, int_t **Lrowind_bc_ptr)
 
void getSCUweight (int_t nsupers, treeList_t *treeList, int_t *xsup, int_t **Lrowind_bc_ptr, int_t **Ufstnz_br_ptr, gridinfo3d_t *grid3d)
 
int Wait_LUDiagSend (int_t k, MPI_Request *U_diag_blk_send_req, MPI_Request *L_diag_blk_send_req, gridinfo_t *grid, SCT_t *SCT)
 
int getNsupers (int n, Glu_persist_t *Glu_persist)
 
int set_tag_ub (void)
 
int getNumThreads (int)
 
int_t num_full_cols_U (int_t kk, int_t **Ufstnz_br_ptr, int_t *xsup, gridinfo_t *, int_t *, int_t *)
 
int_tgetFactPerm (int_t)
 
int_tgetFactIperm (int_t *, int_t)
 
int_t initCommRequests (commRequests_t *comReqs, gridinfo_t *grid)
 
int_t initFactStat (int_t nsupers, factStat_t *factStat)
 
int freeFactStat (factStat_t *factStat)
 
int_t initFactNodelists (int_t, int_t, int_t, factNodelists_t *)
 
int freeFactNodelists (factNodelists_t *fNlists)
 
int_t initMsgs (msgs_t *msgs)
 
int_t getNumLookAhead (superlu_dist_options_t *)
 
commRequests_t ** initCommRequestsArr (int_t mxLeafNode, int_t ldt, gridinfo_t *grid)
 
int freeCommRequestsArr (int_t mxLeafNode, commRequests_t **comReqss)
 
msgs_t ** initMsgsArr (int_t numLA)
 
int freeMsgsArr (int_t numLA, msgs_t **msgss)
 
int_t Trs2_InitUblock_info (int_t klst, int_t nb, Ublock_info_t *, int_t *usub, Glu_persist_t *, SuperLUStat_t *)
 
int Cmpfunc_R_info (const void *a, const void *b)
 
int Cmpfunc_U_info (const void *a, const void *b)
 
int sort_R_info (Remain_info_t *Remain_info, int n)
 
int sort_U_info (Ublock_info_t *Ublock_info, int n)
 
int sort_R_info_elm (Remain_info_t *Remain_info, int n)
 
int sort_U_info_elm (Ublock_info_t *Ublock_info, int n)
 
void printTRStimer (xtrsTimer_t *xtrsTimer, gridinfo3d_t *grid3d)
 
void initTRStimer (xtrsTimer_t *xtrsTimer, gridinfo_t *grid)
 
int_t ** getTreePerm (int_t *myTreeIdxs, int_t *myZeroTrIdxs, int_t *nodeCount, int_t **nodeList, int_t *perm_c_supno, int_t *iperm_c_supno, gridinfo3d_t *grid3d)
 
int_tgetMyNodeCounts (int_t maxLvl, int_t *myTreeIdxs, int_t *gNodeCount)
 
int_t checkIntVector3d (int_t *vec, int_t len, gridinfo3d_t *grid3d)
 
int_t reduceStat (PhaseType PHASE, SuperLUStat_t *stat, gridinfo3d_t *grid3d)
 
int_t Wait_LSend (int_t k, gridinfo_t *grid, int **ToSendR, MPI_Request *s, SCT_t *)
 
int_t Wait_USend (MPI_Request *, gridinfo_t *, SCT_t *)
 
int_t Check_LRecv (MPI_Request *, int *msgcnt)
 
int_t Wait_UDiagBlockSend (MPI_Request *, gridinfo_t *, SCT_t *)
 
int_t Wait_LDiagBlockSend (MPI_Request *, gridinfo_t *, SCT_t *)
 
int_t Wait_UDiagBlock_Recv (MPI_Request *, SCT_t *)
 
int_t Test_UDiagBlock_Recv (MPI_Request *, SCT_t *)
 
int_t Wait_LDiagBlock_Recv (MPI_Request *, SCT_t *)
 
int_t Test_LDiagBlock_Recv (MPI_Request *, SCT_t *)
 
int_t LDiagBlockRecvWait (int_t k, int_t *factored_U, MPI_Request *, gridinfo_t *)
 

Variables

static const int BC_L =1
 
static const int RD_L =2
 
static const int BC_U =3
 
static const int RD_U =4
 

Detailed Description

Definitions which are precision-neutral.

Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)

All rights reserved.

The source code is distributed under BSD license, see the file License.txt at the top-level directory.

-- Distributed SuperLU routine (version 8.1.2) --
Lawrence Berkeley National Lab, Univ. of California Berkeley.
November 1, 2007

Modified:
    February 20, 2008
    October 11, 2014
    September 18, 2018  version 6.0
    February 8, 2019    version 6.1.1
    November 12, 2019   version 6.2.0
    October 23, 2020    version 6.4.0
    May 12, 2021        version 7.0.0
    October 5, 2021     version 7.1.0
    October 18, 2021    version 7.1.1
    December 12, 2021   version 7.2.0
    May 22, 2022        version 8.0.0
    July 5, 2022        version 8.1.0
    October 1, 2022     version 8.1.1
    November 12, 2022   version 8.1.2
    November 17, 2023   version 8.2.1

Macro Definition Documentation

◆ __SUPERLU_ASYNC_TREE

#define __SUPERLU_ASYNC_TREE

◆ BC_HEADER

#define BC_HEADER   2

◆ BC_HEADER_NEWU

#define BC_HEADER_NEWU   3

◆ BLK_K

#define BLK_K   2048/(BLK_M)

◆ BLK_M

#define BLK_M   DIM_X*4

◆ BLK_N

#define BLK_N   DIM_Y*4

◆ BlockNum

#define BlockNum (   i)    ( supno[i] )

◆ BR_HEADER

#define BR_HEADER   3

◆ CEILING

#define CEILING (   a,
 
)    ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )

◆ cmax

#define cmax (   a,
 
)    ((a) > (b) ? (a) : (b))

◆ COMM_ALL

#define COMM_ALL   100

◆ COMM_COLUMN

#define COMM_COLUMN   101

◆ COMM_ROW

#define COMM_ROW   102

◆ DEG_TREE

#define DEG_TREE   2

◆ DIM_X

#define DIM_X   16

◆ DIM_XA

#define DIM_XA   DIM_X

◆ DIM_XB

#define DIM_XB   DIM_X

◆ DIM_Y

#define DIM_Y   16

◆ DIM_YA

#define DIM_YA   DIM_Y

◆ DIM_YB

#define DIM_YB   DIM_Y

◆ fetch

#define fetch (   A,
  m,
  n,
  bound 
)    offs_d##A[min(n*LD##A+m, bound)]

◆ fma

#define fma (   A,
  B,
 
)    C += (A*B)

◆ FstBlockC

#define FstBlockC (   bnum)    ( xsup[bnum] )

◆ GSUM

#define GSUM   20

◆ IAM

#define IAM (   comm)    { int rank; MPI_Comm_rank ( comm, &rank ); rank};

◆ IFMT

#define IFMT   "%lld"

◆ ISORT

#define ISORT   /* NOTE: qsort() has bug on Mac */

◆ LB_DESCRIPTOR

#define LB_DESCRIPTOR   2

◆ LBi

#define LBi (   bnum,
  grid 
)    ( (bnum)/grid->nprow )/* Global to local block rowwise */

◆ LBj

#define LBj (   bnum,
  grid 
)    ( (bnum)/grid->npcol )/* Global to local block columnwise*/

◆ LkkDIAG

#define LkkDIAG   15

◆ LkSUB

#define LkSUB   13

◆ LkVAL

#define LkVAL   14

◆ LOG2

#define LOG2 (   x)    (log10((double) x) / log10(2.0))

◆ LSUM

#define LSUM   23

◆ LSUM_BLK

#define LSUM_BLK (   i)     ilsum[i] * nrhs + (i+1) * LSUM_H

◆ LSUM_H

#define LSUM_H   2 /* The header preceding each MOD block. */

◆ MAGMA_CONST

#define MAGMA_CONST

◆ MAX_SUPER_SIZE

#define MAX_SUPER_SIZE   512 /* Sherry: moved from superlu_gpu.cu */

◆ mpi_int_t

#define mpi_int_t   MPI_LONG_LONG_INT

◆ MYCOL

#define MYCOL (   iam,
  grid 
)    ( (iam) % grid->npcol )

◆ MYROW

#define MYROW (   iam,
  grid 
)    ( (iam) / grid->npcol )

◆ NBUFFERS

#define NBUFFERS   5

◆ NO_MARKER

#define NO_MARKER   3

◆ NTAGS

#define NTAGS   INT_MAX

◆ NWARP

#define NWARP   DIM_X*DIM_Y/32

◆ PCOL

#define PCOL (   bnum,
  grid 
)    ( (bnum) % grid->npcol )

◆ PNUM

#define PNUM (   i,
  j,
  grid 
)    ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */

◆ PROW

#define PROW (   bnum,
  grid 
)    ( (bnum) % grid->nprow )

◆ RHS_ITERATE

#define RHS_ITERATE (   i)     for (i = 0; i < nrhs; ++i)

◆ SLU_MPI_TAG

#define SLU_MPI_TAG (   id,
  num 
)    ( (6*(num)+id) % tag_ub )

◆ SUPER_BLOCK

#define SUPER_BLOCK   12

◆ SUPER_LINEAR

#define SUPER_LINEAR   11

◆ SUPERLU_DIST_EXPORT

#define SUPERLU_DIST_EXPORT

◆ SUPERLU_DIST_MAJOR_VERSION

#define SUPERLU_DIST_MAJOR_VERSION   8

◆ SUPERLU_DIST_MINOR_VERSION

#define SUPERLU_DIST_MINOR_VERSION   2

◆ SUPERLU_DIST_PATCH_VERSION

#define SUPERLU_DIST_PATCH_VERSION   1

◆ SUPERLU_DIST_RELEASE_DATE

#define SUPERLU_DIST_RELEASE_DATE   "November 17, 2023"

◆ SuperLU_MPI_COMPLEX

#define SuperLU_MPI_COMPLEX   MPI_C_COMPLEX

◆ SuperLU_MPI_DOUBLE_COMPLEX

#define SuperLU_MPI_DOUBLE_COMPLEX   MPI_C_DOUBLE_COMPLEX

◆ SuperLU_timer_

#define SuperLU_timer_ (   void)    SuperLU_timer_dist_

◆ SuperSize

#define SuperSize (   bnum)    ( xsup[bnum+1]-xsup[bnum] )

◆ THR_M

#define THR_M   ( BLK_M / DIM_X )

◆ THR_N

#define THR_N   ( BLK_N / DIM_Y )

◆ UB_DESCRIPTOR

#define UB_DESCRIPTOR   2

◆ UB_DESCRIPTOR_NEWU

#define UB_DESCRIPTOR_NEWU   2

◆ UjROW

#define UjROW   10

◆ UkSUB

#define UkSUB   11

◆ UkVAL

#define UkVAL   12

◆ VT_TRACEOFF

#define VT_TRACEOFF

◆ VT_TRACEON

#define VT_TRACEON

◆ X_BLK

#define X_BLK (   i)     ilsum[i] * nrhs + (i+1) * XK_H

◆ Xk

#define Xk   21

◆ XK_H

#define XK_H   2 /* The header preceding each X block. */

◆ Yk

#define Yk   22

Typedef Documentation

◆ int_t

typedef int64_t int_t

◆ treePartStrat

◆ xtrsTimer_t

typedef struct xtrsTimer_t xtrsTimer_t

Enumeration Type Documentation

◆ treePartStrat

Enumerator
ND 
GD 
ND 
GD 

Function Documentation

◆ arrive_at_ublock()

void arrive_at_ublock ( int_t  j,
int_t iukp,
int_t rukp,
int_t jb,
int_t ljb,
int_t nsupc,
int_t  iukp0,
int_t  rukp0,
int_t usub,
int_t perm_u,
int_t xsup,
gridinfo_t grid 
)
Here is the caller graph for this function:

◆ at_plus_a_dist()

void at_plus_a_dist ( const int_t  n,
const int_t  nz,
int_t colptr,
int_t rowind,
int_t bnz,
int_t **  b_colptr,
int_t **  b_rowind 
)
Purpose
=======

Form the structure of A'+A. A is an n-by-n matrix in column oriented
format represented by (colptr, rowind). The output A'+A is in column
oriented format (symmetrically, also row oriented), represented by
(b_colptr, b_rowind).
Here is the caller graph for this function:

◆ bcast_tree()

void bcast_tree ( void *  buf,
int  count,
MPI_Datatype  dtype,
int  root,
int  tag,
gridinfo_t grid,
int  scope,
int *  recvcnt 
)
Purpose
=======
  Broadcast an array of *dtype* numbers. The communication pattern
  is a tree with number of branches equal to NBRANCHES.
  The process ranks are between 0 and Np-1.

  The following two pairs of graphs give different ways of viewing the same
  algorithm.  The first pair shows the trees as they should be visualized
  when examining the algorithm.  The second pair are isomorphic graphs of
  of the first, which show the actual pattern of data movement.
  Note that a tree broadcast with NBRANCHES = 2 is isomorphic with a
  hypercube broadcast (however, it does not require the nodes be a
  power of two to work).

   TREE BROADCAST, NBRANCHES = 2     *    TREE BROADCAST, NBRANCHES = 3

    root=2
i=4   &______________                *
      |              \               *       root=2
i=2   &______         &______        * i=3     &______________________
      |      \        |      \       *         |          \           \
i=1   &__     &__     &__     &__    * i=1     &______     &______     &__
      |  \    |  \    |  \    |  \   *         |  \   \    |  \   \    |  \
      2   3   4   5   6   7   0   1  *         2   3   4   5   6   7   0   1


         ISOMORPHIC GRAPHS OF ABOVE, SHOWN IN MORE FAMILIAR TERMS:

               2                                           2
      _________|_________                       ___________|____________
     /         |         \                     /           |      |     \
    6          4          3                   5            0      3      4
   / \         |                             / \           |
  0   7        5                            6   7          1
  |
  1


Arguments
=========

scope

◆ C_BcTree_Create()

void C_BcTree_Create ( C_Tree tree,
MPI_Comm  comm,
int *  ranks,
int  rank_cnt,
int  msgSize,
char  precision 
)
Here is the caller graph for this function:

◆ C_BcTree_forwardMessageSimple()

void C_BcTree_forwardMessageSimple ( C_Tree tree,
void *  localBuffer,
int  msgSize 
)
Here is the caller graph for this function:

◆ C_BcTree_IsRoot()

yes_no_t C_BcTree_IsRoot ( C_Tree tree)
Here is the caller graph for this function:

◆ C_BcTree_Nullify()

void C_BcTree_Nullify ( C_Tree tree)
Here is the caller graph for this function:

◆ C_BcTree_waitSendRequest()

void C_BcTree_waitSendRequest ( C_Tree tree)
Here is the caller graph for this function:

◆ C_RdTree_Create()

void C_RdTree_Create ( C_Tree tree,
MPI_Comm  comm,
int *  ranks,
int  rank_cnt,
int  msgSize,
char  precision 
)
Here is the caller graph for this function:

◆ C_RdTree_forwardMessageSimple()

void C_RdTree_forwardMessageSimple ( C_Tree Tree,
void *  localBuffer,
int  msgSize 
)
Here is the caller graph for this function:

◆ C_RdTree_IsRoot()

yes_no_t C_RdTree_IsRoot ( C_Tree tree)
Here is the caller graph for this function:

◆ C_RdTree_Nullify()

void C_RdTree_Nullify ( C_Tree tree)
Here is the caller graph for this function:

◆ C_RdTree_waitSendRequest()

void C_RdTree_waitSendRequest ( C_Tree Tree)
Here is the caller graph for this function:

◆ calcNumNodes()

int_t * calcNumNodes ( int_t  maxLvl,
int_t treeHeads,
treeList_t treeList 
)

◆ calcTreeWeight()

int_t calcTreeWeight ( int_t  nsupers,
int_t setree,
treeList_t treeList,
int_t xsup 
)
Here is the call graph for this function:

◆ calculate_num_children()

int_t * calculate_num_children ( int_t  nsuper,
int_t setree 
)
Here is the call graph for this function:

◆ Check_LRecv()

int_t Check_LRecv ( MPI_Request *  recv_req,
int *  msgcnt 
)

◆ check_perm_dist()

int check_perm_dist ( char *  what,
int_t  n,
int_t perm 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ check_repfnz_dist()

void check_repfnz_dist ( int_t  n,
int_t  w,
int_t  jcol,
int_t repfnz 
)

Check whether repfnz[] == EMPTY after reset.

Check whether repfnz[] == EMPTY after reset.

◆ checkIntVector3d()

int_t checkIntVector3d ( int_t vec,
int_t  len,
gridinfo3d_t grid3d 
)
Here is the call graph for this function:

◆ CheckZeroDiagonal()

int_t CheckZeroDiagonal ( int_t  n,
int_t rowind,
int_t colbeg,
int_t colcnt 
)

◆ Cmpfunc_R_info()

int Cmpfunc_R_info ( const void *  a,
const void *  b 
)
Here is the caller graph for this function:

◆ Cmpfunc_U_info()

int Cmpfunc_U_info ( const void *  a,
const void *  b 
)
Here is the caller graph for this function:

◆ compare_pair()

int compare_pair ( const void *  a,
const void *  b 
)
Here is the caller graph for this function:

◆ countnz_dist()

void countnz_dist ( const int_t  n,
int_t xprune,
int_t nnzL,
int_t nnzU,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)
Count the total number of nonzeros in factors L and U,  and in the 
symmetrically reduced L. 

◆ Destroy_CompCol_Matrix_dist()

void Destroy_CompCol_Matrix_dist ( SuperMatrix A)

◆ Destroy_CompCol_Permuted_dist()

void Destroy_CompCol_Permuted_dist ( SuperMatrix A)

A is of type Stype==NCP.

◆ Destroy_CompRow_Matrix_dist()

void Destroy_CompRow_Matrix_dist ( SuperMatrix A)

◆ Destroy_CompRowLoc_Matrix_dist()

void Destroy_CompRowLoc_Matrix_dist ( SuperMatrix A)

◆ Destroy_SuperMatrix_Store_dist()

void Destroy_SuperMatrix_Store_dist ( SuperMatrix A)

Deallocate the structure pointing to the actual storage of the matrix.

◆ Destroy_SuperNode_Matrix_dist()

void Destroy_SuperNode_Matrix_dist ( SuperMatrix A)

◆ DistPrint()

void DistPrint ( char *  function_name,
double  value,
char *  Units,
gridinfo_t grid 
)
Here is the caller graph for this function:

◆ DistPrint3D()

void DistPrint3D ( char *  function_name,
double  value,
char *  Units,
gridinfo3d_t grid3d 
)

◆ dmach_dist()

double dmach_dist ( char *  cmach)
Here is the caller graph for this function:

◆ estimate_bigu_size()

int_t estimate_bigu_size ( int_t  nsupers,
int_t **  Ufstnz_br_ptr,
Glu_persist_t Glu_persist,
gridinfo_t grid,
int_t perm_u,
int_t max_ncols 
)
Here is the call graph for this function:

◆ estimate_cpu_time()

double estimate_cpu_time ( int  m,
int  n,
int  k 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ Etree_LevelBoundry()

int_t * Etree_LevelBoundry ( int_t perm,
int_t tsort_etree,
int_t  nsuper 
)
Here is the call graph for this function:

◆ file_PrintInt10()

int file_PrintInt10 ( FILE *  fp,
char *  name,
int_t  len,
int_t x 
)

◆ file_PrintInt32()

int file_PrintInt32 ( FILE *  fp,
char *  name,
int  len,
int *  x 
)

◆ file_PrintLong10()

int file_PrintLong10 ( FILE *  ,
char *  ,
int_t  ,
int_t  
)

◆ fixupL_dist()

int64_t fixupL_dist ( const int_t  n,
const int_t perm_r,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)
Fix up the data storage lsub for L-subscripts. It removes the subscript
sets for structural pruning,    and applies permuation to the remaining
subscripts.
Fix up the data storage lsub[] for L-subscripts. It removes the subscript
sets for structural pruning,    and applies permuation to the remaining
subscripts.

Return value:
  number of entries in lsub[], which includes the size of the pruned graph,
  which is interspersed in the supernodal graph in the lsub[] array.

◆ free_treelist()

int free_treelist ( int_t  nsuper,
treeList_t treeList 
)

◆ freeCommRequestsArr()

int freeCommRequestsArr ( int_t  mxLeafNode,
commRequests_t **  comReqss 
)

◆ freeFactNodelists()

int freeFactNodelists ( factNodelists_t fNlists)

◆ freeFactStat()

int freeFactStat ( factStat_t factStat)

◆ freeMsgsArr()

int freeMsgsArr ( int_t  numLA,
msgs_t **  msgss 
)

◆ genmmd_dist_()

int genmmd_dist_ ( int_t neqns,
int_t xadj,
int_t a,
int_t invp,
int_t perm,
int_t delta,
int_t dhead,
int_t qsize,
int_t llist,
int_t marker,
int_t maxint,
int_t nofsub 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_acc_offload()

int get_acc_offload ( void  )
Here is the caller graph for this function:

◆ get_diag_procs()

void get_diag_procs ( int_t  n,
Glu_persist_t Glu_persist,
gridinfo_t grid,
int_t num_diag_procs,
int_t **  diag_procs,
int_t **  diag_len 
)
Here is the call graph for this function:

◆ get_max_buffer_size()

int_t get_max_buffer_size ( void  )

◆ get_min()

int_t get_min ( int_t sums,
int_t  nprocs 
)
Here is the caller graph for this function:

◆ get_perm_c_dist()

void get_perm_c_dist ( int_t  pnum,
int_t  ispec,
SuperMatrix A,
int_t perm_c 
)
Purpose
=======

GET_PERM_C_DIST obtains a permutation matrix Pc, by applying the multiple
minimum degree ordering code by Joseph Liu to matrix A'*A or A+A',
or using approximate minimum degree column ordering by Davis et. al.
The LU factorization of A*Pc tends to have less fill than the LU 
factorization of A.

Arguments
=========

ispec   (input) colperm_t
        Specifies what type of column permutation to use to reduce fill.
        = NATURAL: natural ordering (i.e., Pc = I)
        = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A
        = MMD_ATA: minimum degree ordering on structure of A'*A
        = METIS_AT_PLUS_A: MeTis on A'+A

A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
        of the linear equations is A->nrow. Currently, the type of A 
        can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
        In the future, more general A can be handled.

perm_c  (output) int*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_perm_c_parmetis()

float get_perm_c_parmetis ( SuperMatrix A,
int_t perm_r,
int_t perm_c,
int  nprocs_i,
int  noDomains,
int_t **  sizes,
int_t **  fstVtxSep,
gridinfo_t grid,
MPI_Comm *  metis_comm 
)
Purpose
=======

GET_PERM_C_PARMETIS obtains a permutation matrix Pc, by applying a
graph partitioning algorithm to the symmetrized graph A+A'.  The
multilevel graph partitioning algorithm used is the
ParMETIS_V3_NodeND routine available in the parallel graph
partitioning package parMETIS.  

The number of independent sub-domains noDomains computed by this
algorithm has to be a power of 2.  Hence noDomains is the larger
number power of 2 that is smaller than nprocs_i, where nprocs_i = nprow
* npcol is the number of processors used in SuperLU_DIST.

Arguments
=========

A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
        of the linear equations is A->nrow.  Matrix A is distributed
        in NRformat_loc format.

perm_r  (input) int_t*
        Row permutation vector of size A->nrow, which defines the 
        permutation matrix Pr; perm_r[i] = j means row i of A is in 
        position j in Pr*A.

perm_c  (output) int_t*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.

nprocs_i (input) int*
        Number of processors the input matrix is distributed on in a block
        row format.  It corresponds to number of processors used in
        SuperLU_DIST.

noDomains (input) int*, must be power of 2
        Number of independent domains to be computed by the graph
        partitioning algorithm.  ( noDomains <= nprocs_i )

sizes   (output) int_t**, of size 2 * noDomains
        Returns pointer to an array containing the number of nodes
        for each sub-domain and each separator.  Separators are stored 
        from left to right.
        Memory for the array is allocated in this routine.

fstVtxSep (output) int_t**, of size 2 * noDomains
        Returns pointer to an array containing first node for each
        sub-domain and each separator.
        Memory for the array is allocated in this routine.

Return value
============
  < 0, number of bytes allocated on return from the symbolic factorization.
  > 0, number of bytes allocated when out of memory.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_thread_per_process()

int get_thread_per_process ( void  )

◆ getBigUSize()

int_t getBigUSize ( superlu_dist_options_t options,
int_t  nsupers,
gridinfo_t grid,
int_t **  Lrowind_bc_ptr 
)
Here is the call graph for this function:

◆ getCommonAncestorList()

int_t getCommonAncestorList ( int_t  k,
int_t alist,
int_t seTree,
treeList_t treeList 
)
Here is the caller graph for this function:

◆ getCommonAncsCount()

int_t getCommonAncsCount ( int_t  k,
treeList_t treeList 
)

◆ getDescendList()

int_t getDescendList ( int_t  k,
int_t dlist,
treeList_t treeList 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ getEtreeLB()

int_t * getEtreeLB ( int_t  nnodes,
int_t perm_l,
int_t gTopOrder 
)
Here is the call graph for this function:

◆ getFactIperm()

int_t * getFactIperm ( int_t perm,
int_t  nsupers 
)

◆ getFactPerm()

int_t * getFactPerm ( int_t  nsupers)

◆ getForests()

sForest_t ** getForests ( int_t  maxLvl,
int_t  nsupers,
int_t setree,
treeList_t treeList 
)
Here is the call graph for this function:

◆ getGlobal_iperm()

int_t * getGlobal_iperm ( int_t  nsupers,
int_t  nperms,
int_t **  perms,
int_t nnodes 
)

◆ getGreedyLoadBalForests()

sForest_t ** getGreedyLoadBalForests ( int_t  maxLvl,
int_t  nsupers,
int_t setree,
treeList_t treeList 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ getGridTrees()

int_t * getGridTrees ( gridinfo3d_t grid3d)
Here is the call graph for this function:

◆ getIsNodeInMyGrid()

int * getIsNodeInMyGrid ( int_t  nsupers,
int_t  maxLvl,
int_t myNodeCount,
int_t **  treePerm 
)

◆ getLastDepBtree()

int * getLastDepBtree ( int_t  nsupers,
treeList_t treeList 
)

◆ getMyEtLims()

int_t * getMyEtLims ( int_t  nnodes,
int_t myTopOrder 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ getMyIperm()

int_t * getMyIperm ( int_t  nnodes,
int_t  nsupers,
int_t myPerm 
)
Here is the caller graph for this function:

◆ getMyNodeCounts()

int_t * getMyNodeCounts ( int_t  maxLvl,
int_t myTreeIdxs,
int_t gNodeCount 
)

◆ getMyNodeCountsFr()

int_t * getMyNodeCountsFr ( int_t  maxLvl,
int_t myTreeIdxs,
sForest_t **  sForests 
)

◆ getMyTopOrder()

int_t * getMyTopOrder ( int_t  nnodes,
int_t myPerm,
int_t myIperm,
int_t setree 
)
Here is the caller graph for this function:

◆ getMyTreeTopoInfo()

treeTopoInfo_t getMyTreeTopoInfo ( int_t  nnodes,
int_t  nsupers,
int_t myPerm,
int_t setree 
)
Here is the call graph for this function:

◆ getNestDissForests()

sForest_t ** getNestDissForests ( int_t  maxLvl,
int_t  nsupers,
int_t setree,
treeList_t treeList 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ getNodeCountsFr()

int_t * getNodeCountsFr ( int_t  maxLvl,
sForest_t **  sForests 
)

◆ getNodeList()

int_t ** getNodeList ( int_t  maxLvl,
int_t setree,
int_t nnodes,
int_t treeHeads,
treeList_t treeList 
)
Here is the call graph for this function:

◆ getNodeListFr()

int_t ** getNodeListFr ( int_t  maxLvl,
sForest_t **  sForests 
)

◆ getNsupers()

int getNsupers ( int  n,
Glu_persist_t Glu_persist 
)

◆ getNumLookAhead()

int_t getNumLookAhead ( superlu_dist_options_t options)

◆ getNumThreads()

int getNumThreads ( int  iam)

◆ getPerm_c_supno()

int_t * getPerm_c_supno ( int_t  nsupers,
superlu_dist_options_t options,
int_t etree,
Glu_persist_t Glu_persist,
int_t **  Lrowind_bc_ptr,
int_t **  Ufstnz_br_ptr,
gridinfo_t grid 
)
Here is the call graph for this function:

◆ getPermNodeList()

int_t * getPermNodeList ( int_t  nnode,
int_t nlist,
int_t perm_c_sup,
int_t iperm_c_sup 
)
Here is the call graph for this function:

◆ getReplicatedTrees()

int_t * getReplicatedTrees ( gridinfo3d_t grid3d)
Here is the call graph for this function:

◆ getSCUweight()

void getSCUweight ( int_t  nsupers,
treeList_t treeList,
int_t xsup,
int_t **  Lrowind_bc_ptr,
int_t **  Ufstnz_br_ptr,
gridinfo3d_t grid3d 
)
Here is the call graph for this function:

◆ getSubTreeRoots()

int_t * getSubTreeRoots ( int_t  k,
treeList_t treeList 
)
Here is the caller graph for this function:

◆ getTreeHeads()

int_t * getTreeHeads ( int_t  maxLvl,
int_t  nsupers,
treeList_t treeList 
)
Here is the call graph for this function:

◆ getTreePerm()

int_t ** getTreePerm ( int_t myTreeIdxs,
int_t myZeroTrIdxs,
int_t nodeCount,
int_t **  nodeList,
int_t perm_c_supno,
int_t iperm_c_supno,
gridinfo3d_t grid3d 
)
Here is the call graph for this function:

◆ getTreePermForest()

int_t ** getTreePermForest ( int_t myTreeIdxs,
int_t myZeroTrIdxs,
sForest_t sForests,
int_t perm_c_supno,
int_t iperm_c_supno,
gridinfo3d_t grid3d 
)

◆ getTreePermFr()

int_t ** getTreePermFr ( int_t myTreeIdxs,
sForest_t **  sForests,
gridinfo3d_t grid3d 
)
Here is the call graph for this function:

◆ ifill_dist()

void ifill_dist ( int_t a,
int_t  alen,
int_t  ival 
)

Fills an integer array with a given value.

Here is the caller graph for this function:

◆ initCommRequests()

int_t initCommRequests ( commRequests_t comReqs,
gridinfo_t grid 
)
Here is the caller graph for this function:

◆ initCommRequestsArr()

commRequests_t ** initCommRequestsArr ( int_t  mxLeafNode,
int_t  ldt,
gridinfo_t grid 
)
Here is the call graph for this function:

◆ initFactNodelists()

int_t initFactNodelists ( int_t  ldt,
int_t  num_threads,
int_t  nsupers,
factNodelists_t fNlists 
)

◆ initFactStat()

int_t initFactStat ( int_t  nsupers,
factStat_t factStat 
)
Here is the call graph for this function:

◆ initMsgs()

int_t initMsgs ( msgs_t msgs)
Here is the caller graph for this function:

◆ initMsgsArr()

msgs_t ** initMsgsArr ( int_t  numLA)
Here is the call graph for this function:

◆ initTRStimer()

void initTRStimer ( xtrsTimer_t xtrsTimer,
gridinfo_t grid 
)

◆ int32Calloc_dist()

int * int32Calloc_dist ( int  n)
Here is the caller graph for this function:

◆ int32Malloc_dist()

int * int32Malloc_dist ( int  n)
Here is the caller graph for this function:

◆ intCalloc_dist()

int_t * intCalloc_dist ( int_t  n)

◆ intMalloc_dist()

int_t * intMalloc_dist ( int_t  n)

◆ isort()

void isort ( int_t  N,
int_t ARRAY1,
int_t ARRAY2 
)

◆ isort1()

void isort1 ( int_t  N,
int_t ARRAY 
)

◆ LDiagBlockRecvWait()

int_t LDiagBlockRecvWait ( int_t  k,
int_t factored_U,
MPI_Request *  L_diag_blk_recv_req,
gridinfo_t grid 
)

◆ log2i()

int_t log2i ( int_t  index)
Here is the caller graph for this function:

◆ log_memory()

void log_memory ( int64_t  cur_bytes,
SuperLUStat_t stat 
)

◆ mc64id_dist()

int mc64id_dist ( int *  icntl)
Here is the caller graph for this function:

◆ merg_perms()

int_t * merg_perms ( int_t  nperms,
int_t nnodes,
int_t **  perms 
)
Here is the call graph for this function:

◆ num_full_cols_U()

int_t num_full_cols_U ( int_t  kk,
int_t **  Ufstnz_br_ptr,
int_t xsup,
gridinfo_t grid,
int_t perm_u,
int_t ldu 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ partition()

int_t partition ( int_t a,
int_t  l,
int_t  r,
int_t  dir 
)
Here is the caller graph for this function:

◆ partitionM()

int_t partitionM ( int_t a,
int_t  l,
int_t  r,
int_t  lda,
int_t  dir,
int_t  dims 
)
Here is the caller graph for this function:

◆ print_etree()

void print_etree ( int_t setree,
int_t iperm,
int_t  nsuper 
)

◆ print_etree_leveled()

void print_etree_leveled ( int_t setree,
int_t tsort_etree,
int_t  nsuper 
)

◆ Print_EtreeLevelBoundry()

void Print_EtreeLevelBoundry ( int_t Etree_LvlBdry,
int_t  max_level,
int_t  nsuper 
)

◆ print_memorylog()

void print_memorylog ( SuperLUStat_t stat,
char *  msg 
)

◆ print_options_dist()

void print_options_dist ( superlu_dist_options_t options)

Print the options setting.

Here is the call graph for this function:

◆ print_panel_seg_dist()

void print_panel_seg_dist ( int_t  n,
int_t  w,
int_t  jcol,
int_t  nseg,
int_t segrep,
int_t repfnz 
)

Diagnostic print of segment info after panel_dfs().

◆ print_sp_ienv_dist()

void print_sp_ienv_dist ( superlu_dist_options_t options)

Print the blocking parameters.

Here is the call graph for this function:

◆ PrintDouble5()

void PrintDouble5 ( char *  ,
int_t  ,
double *   
)

◆ printFileList()

int_t printFileList ( char *  sname,
int_t  nnodes,
int_t dlist,
int_t setree 
)

◆ printForestWeightCost()

void printForestWeightCost ( sForest_t **  sForests,
SCT_t SCT,
gridinfo3d_t grid3d 
)
Here is the call graph for this function:

◆ PrintInt10()

void PrintInt10 ( char *  name,
int_t  len,
int_t x 
)

◆ PrintInt32()

void PrintInt32 ( char *  name,
int  len,
int *  x 
)

◆ printTRStimer()

void printTRStimer ( xtrsTimer_t xtrsTimer,
gridinfo3d_t grid3d 
)

◆ PStatClear()

void PStatClear ( SuperLUStat_t stat)
Here is the caller graph for this function:

◆ PStatFree()

void PStatFree ( SuperLUStat_t stat)

◆ PStatInit()

void PStatInit ( SuperLUStat_t stat)

◆ PStatPrint()

void PStatPrint ( superlu_dist_options_t options,
SuperLUStat_t stat,
gridinfo_t grid 
)
Here is the call graph for this function:

◆ psymbfact_LUXpand()

int_t psymbfact_LUXpand ( int_t  iam,
int_t  n,
int_t  fstVtxLvl_loc,
int_t  vtxXp,
int_t p_next,
int_t  min_new_len,
int_t  mem_type,
int_t  rout_type,
int_t  free_prev_mem,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)
Expand the data structures for L and U during the factorization.
Return value: SUCCES_RET - successful return
              ERROR_RET - error due to a memory alocation failure
Expand the data structures for L and U during the factorization.
Return value: SUCCES_RET - successful return
              ERROR_RET - error due to a memory alocation failure

Sherry: this function is used in the upper separator tree above the domains. It does not call 'expand()'

Here is the call graph for this function:
Here is the caller graph for this function:

◆ psymbfact_LUXpand_RL()

int_t psymbfact_LUXpand_RL ( int_t  iam,
int_t  n,
int_t  vtxXp,
int_t  next,
int_t  len_texp,
int_t  mem_type,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)
Expand the data structures for L and U during the factorization.
Return value:   0 - successful return
              > 0 - number of bytes allocated when run out of space
Expand the data structures for L and U during the factorization.
Return value:   0 - successful return
              > 0 - number of bytes allocated when run out of space

Sherry: this function calls psymbfact_LUXpandMem().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ psymbfact_LUXpandMem()

int_t psymbfact_LUXpandMem ( int  iam,
int_t  n,
int_t  vtxXp,
int_t  next,
int_t  min_new_len,
int  mem_type,
int  rout_type,
int  free_prev_mem,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)
Expand the data structures for L and U during the factorization.
Return value:   0 - successful return
              > 0 - number of bytes allocated when run out of space
Expand the data structures for L and U during the factorization.
Return value:   0 - successful return
              > 0 - number of bytes allocated when run out of space

Sherry: this function is used inside the domains.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ psymbfact_prLUXpand()

int_t psymbfact_prLUXpand ( int_t  iam,
int_t  min_new_len,
int  mem_type,
Llu_symbfact_t Llu_symbfact,
psymbfact_stat_t PS 
)
Expand the data structures for L and U pruned during the factorization.
Return value: SUCCES_RET - successful return
              ERROR_RET - error when run out of space
Expand the data structures for L and U pruned during the factorization.
Return value: SUCCES_RET - successful return
              ERROR_RET - error when run out of space

Sherry: this function calls 'expand()' directly.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ pxerr_dist()

void pxerr_dist ( char *  srname,
gridinfo_t grid,
int_t  info 
)
Here is the caller graph for this function:

◆ QuerySpace_dist()

int_t QuerySpace_dist ( int_t  n,
int_t  lsub_size,
Glu_freeable_t Glu_freeable,
superlu_dist_mem_usage_t mem_usage 
)
mem_usage consists of the following fields:
  • for_lu (float) The amount of space used in bytes for the L\U data structures.
  • total (float) The amount of space needed in bytes to perform factorization.
  • expansions (int) Number of memory expansions during the LU factorization.
Here is the caller graph for this function:

◆ quickSort()

void quickSort ( int_t a,
int_t  l,
int_t  r,
int_t  dir 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ quickSortM()

void quickSortM ( int_t a,
int_t  l,
int_t  r,
int_t  lda,
int_t  dir,
int_t  dims 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ reduceStat()

int_t reduceStat ( PhaseType  PHASE,
SuperLUStat_t stat,
gridinfo3d_t grid3d 
)

reduce the states from all the two grids before prinitng it out See the defenition of enum PhaseType in superlu_enum_const.h

◆ SCT_free()

void SCT_free ( SCT_t SCT)
Here is the caller graph for this function:

◆ SCT_init()

void SCT_init ( SCT_t SCT)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ SCT_print()

void SCT_print ( gridinfo_t grid,
SCT_t SCT 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ SCT_print3D()

void SCT_print3D ( gridinfo3d_t grid3d,
SCT_t SCT 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ SCT_printComm3D()

void SCT_printComm3D ( gridinfo3d_t grid3d,
SCT_t SCT 
)
Here is the caller graph for this function:

◆ set_default_options_dist()

void set_default_options_dist ( superlu_dist_options_t options)

Set the default values for the options argument.

◆ set_tag_ub()

int set_tag_ub ( void  )

◆ setree2list()

treeList_t * setree2list ( int_t  nsuper,
int_t setree 
)
Here is the call graph for this function:

◆ smach_dist()

float smach_dist ( char *  cmach)
Here is the caller graph for this function:

◆ sort_R_info()

int sort_R_info ( Remain_info_t Remain_info,
int  n 
)
Here is the call graph for this function:

◆ sort_R_info_elm()

int sort_R_info_elm ( Remain_info_t Remain_info,
int  n 
)
Here is the call graph for this function:

◆ sort_U_info()

int sort_U_info ( Ublock_info_t Ublock_info,
int  n 
)
Here is the call graph for this function:

◆ sort_U_info_elm()

int sort_U_info_elm ( Ublock_info_t Ublock_info,
int  n 
)
Here is the call graph for this function:

◆ sp_coletree_dist()

int sp_coletree_dist ( int_t acolst,
int_t acolend,
int_t arow,
int_t  nr,
int_t  nc,
int_t parent 
)

Nonsymmetric elimination tree.

     Find the elimination tree for A'*A.
     This uses something similar to Liu's algorithm. 
     It runs in time O(nz(A)*log n) and does not form A'*A.

     Input:
       Sparse matrix A.  Numeric values are ignored, so any
       explicit zeros are treated as nonzero.
     Output:
       Integer array of parents representing the elimination
       tree of the symbolic product A'*A.  Each vertex is a
       column of A, and nc means a root of the elimination forest.

     John R. Gilbert, Xerox, 10 Dec 1990
     Based on code by JRG dated 1987, 1988, and 1990.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sp_colorder()

void sp_colorder ( superlu_dist_options_t options,
SuperMatrix A,
int_t perm_c,
int_t etree,
SuperMatrix AC 
)
Purpose
=======

sp_colorder() permutes the columns of the original matrix. It performs
the following steps:

   1. Apply column permutation perm_c[] to A's column pointers to form AC;

   2. If options->Fact = DOFACT, then
      (1) Compute column elimination tree etree[] of AC'AC;
      (2) Post order etree[] to get a postordered elimination tree etree[],
          and a postorder permutation post[];
      (3) Apply post[] permutation to columns of AC;
      (4) Overwrite perm_c[] with the product perm_c * post.

Arguments
=========

options (input) superlu_dist_options_t*
        Specifies whether or not the elimination tree will be re-used.
        If options->Fact == DOFACT, this means first time factor A, 
        etree is computed and output.
        Otherwise, re-factor A, etree is input, unchanged on exit.

A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
        of the linear equations is A->nrow. Currently, the type of A can be:
        Stype = SLU_NC or SLU_NCP; Dtype = SLU__D; Mtype = SLU_GE.
        In the future, more general A can be handled.

perm_c  (input/output) int*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.
        If options->Fact == DOFACT, perm_c is both input and output.
        On output, it is changed according to a postorder of etree.
        Otherwise, perm_c is input.

etree   (input/output) int*
        Elimination tree of Pc*(A'+A)*Pc', dimension A->ncol.
        If options->Fact == DOFACT, etree is an output argument,
        otherwise it is an input argument.
        Note: etree is a vector of parent pointers for a forest whose
        vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.

AC      (output) SuperMatrix*
        The resulting matrix after applied the column permutation
        perm_c[] to matrix A. The type of AC can be:
        Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ sp_ienv_dist()

int sp_ienv_dist ( int  ispec,
superlu_dist_options_t options 
)

Purpose

sp_ienv_dist() is inquired to choose machine-dependent integer parameters for the local environment. See ISPEC for a description of the parameters.

This version provides a set of parameters which should give good,
but not optimal, performance on many of the currently available
computers. Users are encouraged to set the environment variable to change the tuning parameters for their particular machines.

Arguments

ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST.
= 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A; = 7: the minimum value of the product M*N*K for a GEMM call worth being offloaded to accelerator (e.g., GPU, Xeon Phi). = 8: the maximum buffer size on GPU that can hold the "dC" matrix in the GEMM call for the Schur complement update. If this is too small, the Schur complement update will be done in multiple partitions, may be slower. = 9: number of GPU streams = 10: whether to offload work to GPU or not

options (input) superlu_dist_options_t* The structure defines the input parameters to control how the LU decomposition the solves are performed.

(SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC
< 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.


Here is the call graph for this function:

◆ sp_symetree_dist()

int sp_symetree_dist ( int_t acolst,
int_t acolend,
int_t arow,
int_t  n,
int_t parent 
)

Symmetric elimination tree.

     p = spsymetree (A);

     Find the elimination tree for symmetric matrix A.
     This uses Liu's algorithm, and runs in time O(nz*log n).

     Input:
       Square sparse matrix A.  No check is made for symmetry;
       elements below and on the diagonal are ignored.
       Numeric values are ignored, so any explicit zeros are 
       treated as nonzero.
     Output:
       Integer array of parents representing the etree, with n
       meaning a root of the elimination forest.
     Note:  
       This routine uses only the upper triangle, while sparse
       Cholesky (as in spchol.c) uses only the lower.  Matlab's
       dense Cholesky uses only the upper.  This routine could
       be modified to use the lower triangle either by transposing
       the matrix or by traversing it by rows with auxiliary
       pointer and link arrays.

     John R. Gilbert, Xerox, 10 Dec 1990
     Based on code by JRG dated 1987, 1988, and 1990.
     Modified by X.S. Li, November 1999.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ static_partition()

int_t static_partition ( struct superlu_pair work_load,
int_t  nwl,
int_t partition,
int_t  ldp,
int_t sums,
int_t counts,
int  nprocs 
)
Here is the call graph for this function:

◆ super_stats_dist()

void super_stats_dist ( int_t  nsuper,
int_t xsup 
)
Here is the call graph for this function:

◆ superlu_abort_and_exit_dist()

void superlu_abort_and_exit_dist ( char *  msg)

◆ superlu_dist_GetVersionNumber()

int superlu_dist_GetVersionNumber ( int *  major,
int *  minor,
int *  bugfix 
)

◆ superlu_free_dist()

void superlu_free_dist ( void *  addr)

◆ superlu_gridexit()

void superlu_gridexit ( gridinfo_t grid)

◆ superlu_gridexit3d()

void superlu_gridexit3d ( gridinfo3d_t grid)

◆ superlu_gridinit()

void superlu_gridinit ( MPI_Comm  Bcomm,
int  nprow,
int  npcol,
gridinfo_t grid 
)

All processes in the MPI communicator must call this routine.

On output, if a process is not in the SuperLU group, the following values are assigned to it: grid->comm = MPI_COMM_NULL grid->iam = -1

Here is the call graph for this function:

◆ superlu_gridinit3d()

void superlu_gridinit3d ( MPI_Comm  Bcomm,
int  nprow,
int  npcol,
int  npdep,
gridinfo3d_t grid 
)

All processes in the MPI communicator must call this routine.

Here is the call graph for this function:

◆ superlu_gridmap()

void superlu_gridmap ( MPI_Comm  Bcomm,
int  nprow,
int  npcol,
int  usermap[],
int  ldumap,
gridinfo_t grid 
)

All processes in the MPI communicator must call this routine.

On output, if a process is not in the SuperLU group, the following values are assigned to it: grid->comm = MPI_COMM_NULL grid->iam = -1

Here is the caller graph for this function:

◆ superlu_gridmap3d()

void superlu_gridmap3d ( MPI_Comm  Bcomm,
int  nprow,
int  npcol,
int  npdep,
int  usermap[],
gridinfo3d_t grid 
)

All processes in the MPI communicator must call this routine. On output, if a process is not in the SuperLU group, the following values are assigned to it: grid->comm = MPI_COMM_NULL grid->iam = -1.

◆ superlu_malloc_dist()

void * superlu_malloc_dist ( size_t  size)

◆ supernodal_etree()

int_t * supernodal_etree ( int_t  nsuper,
int_t etree,
int_t supno,
int_t xsup 
)

Returns Supernodal Elimination Tree

Parameters
nsuperNumber of Supernodes
etreeScalar elimination tree
supnoVertex to supernode mapping
xsupSupernodal boundaries
Returns
Supernodal elimination tree
Here is the call graph for this function:

◆ symbfact()

int_t symbfact ( superlu_dist_options_t options,
int  pnum,
SuperMatrix A,
int_t perm_c,
int_t etree,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)
Purpose
=======
  symbfact() performs a symbolic factorization on matrix A and sets up 
  the nonzero data structures which are suitable for supernodal Gaussian
  elimination with no pivoting (GENP). This routine features:
       o depth-first search (DFS)
       o supernodes
       o symmetric structure pruning

Return value
============
  < 0, number of bytes needed for LSUB.
  = 0, matrix dimension is 1.
  > 0, number of bytes allocated when out of memory.
Here is the call graph for this function:

◆ symbfact_dist()

float symbfact_dist ( superlu_dist_options_t options,
int  nprocs_num,
int  nprocs_symb,
SuperMatrix A,
int_t perm_c,
int_t perm_r,
int_t sizes,
int_t fstVtxSep,
Pslu_freeable_t Pslu_freeable,
MPI_Comm *  num_comm,
MPI_Comm *  symb_comm,
superlu_dist_mem_usage_t symb_mem_usage 
)
 
Purpose
=======
  symbfact_dist() performs symbolic factorization of matrix A suitable
  for performing the supernodal Gaussian elimination with no pivoting (GEPP). 
  This routine computes the structure of one column of L and one row of U 
  at a time.  It uses:
       o distributed input matrix
       o supernodes
       o symmetric structure pruning


Arguments
=========

nprocs_num (input) int
        Number of processors SuperLU_DIST is executed on, and the input 
        matrix is distributed on.

nprocs_symb (input) int
        Number of processors on which the symbolic factorization is
        performed.  It is equal to the number of independent domains
        idenfied in the graph partitioning algorithm executed
        previously and has to be a power of 2.  It corresponds to
        number of leaves in the separator tree.

A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The
        number of the linear equations is A->nrow.  Matrix A is
        distributed in NRformat_loc format.
        Matrix A is not yet permuted by perm_c.

perm_c  (input) int_t*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.

perm_r  (input) int_t*
    Row permutation vector of size A->nrow, which defines the 
        permutation matrix Pr; perm_r[i] = j means column i of A is 
        in position j in Pr*A.

sizes   (input) int_t*
        Contains the number of vertices in each separator.

fstVtxSep (input) int_t*
        Contains first vertex for each separator.

Pslu_freeable (output) Pslu_freeable_t*
        Returns the local L and U structure, and global to local
        information on the indexing of the vertices.  Contains all
        the information necessary for performing the data
        distribution towards the numeric factorization.

num_comm (input) MPI_Comm*
        Communicator for numerical factorization 

symb_comm (input) MPI_Comm*
        Communicator for symbolic factorization 

symb_mem_usage (input) superlu_dist_mem_usage_t *
        Statistics on memory usage.

Return value
============
  < 0, number of bytes allocated on return from the symbolic factorization.
  > 0, number of bytes allocated when out of memory.

Sketch of the algorithm
=======================

 Distrbute the vertices on the processors using a subtree to
 subcube algorithm.

 Redistribute the structure of the input matrix A according to the
 subtree to subcube computed previously for the symbolic
 factorization routine.  This implies in particular a distribution
 from nprocs_num processors to nprocs_symb processors.

 Perform symbolic factorization guided by the separator tree provided by
 a graph partitioning algorithm.  The symbolic factorization uses a 
 combined left-looking, right-looking approach. 
 
Purpose
=======
  symbfact_dist() performs symbolic factorization of matrix A suitable
  for performing the supernodal Gaussian elimination with no pivoting (GEPP). 
  This routine computes the structure of one column of L and one row of U 
  at a time.  It uses:
       o distributed input matrix
       o supernodes
       o symmetric structure pruning


Arguments
=========

nprocs_num (input) int
        Number of processors SuperLU_DIST is executed on, and the input 
        matrix is distributed on.

nprocs_symb (input) int
        Number of processors on which the symbolic factorization is
        performed.  It is equal to the number of independent domains
        idenfied in the graph partitioning algorithm executed
        previously and has to be a power of 2.  It corresponds to
        number of leaves in the separator tree.

A       (input) SuperMatrix*
        Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The
        number of the linear equations is A->nrow.  Matrix A is
        distributed in NRformat_loc format.
        Matrix A is not yet permuted by perm_c.

perm_c  (input) int_t*
    Column permutation vector of size A->ncol, which defines the 
        permutation matrix Pc; perm_c[i] = j means column i of A is 
        in position j in A*Pc.

perm_r  (input) int_t*
    Row permutation vector of size A->nrow, which defines the 
        permutation matrix Pr; perm_r[i] = j means column i of A is 
        in position j in Pr*A.

sizes   (input) int_t*
        Contains the number of vertices in each separator.

fstVtxSep (input) int_t*
        Contains first vertex for each separator.

Pslu_freeable (output) Pslu_freeable_t*
        Returns the local L and U structure, and global to local
        information on the indexing of the vertices.  Contains all
        the information necessary for performing the data
        distribution towards the numeric factorization.

num_comm (input) MPI_Comm*
        Communicator for numerical factorization 

symb_comm (input) MPI_Comm*
        Communicator for symbolic factorization 

symb_mem_usage (input) superlu_dist_mem_usage_t *
        Statistics on memory usage.

Return value
============
  < 0, number of bytes allocated on return from the symbolic factorization.
  > 0, number of bytes allocated when out of memory.

Sketch of the algorithm
=======================

 Distrbute the vertices on the processors using a subtree to
 subcube algorithm.

 Redistribute the structure of the input matrix A according to the
 subtree to subcube computed previously for the symbolic
 factorization routine.  This implies in particular a distribution
 from nprocs_num processors to nprocs_symb processors.

 Perform symbolic factorization guided by the separator tree provided by
 a graph partitioning algorithm.  The symbolic factorization uses a 
 combined left-looking, right-looking approach. 
Here is the call graph for this function:

◆ symbfact_SubFree()

int_t symbfact_SubFree ( Glu_freeable_t Glu_freeable)
Deallocate storage of the data structures common to symbolic
factorization routines.
Here is the caller graph for this function:

◆ symbfact_SubInit()

int_t symbfact_SubInit ( superlu_dist_options_t options,
fact_t  fact,
void *  work,
int_t  lwork,
int_t  m,
int_t  n,
int_t  annz,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)
Allocate storage for the data structures common to symbolic factorization
routines. For those unpredictable size, make a guess as FILL * nnz(A).
Return value:
    If lwork = -1, return the estimated amount of space required, plus n;
    otherwise, return the amount of space actually allocated when
    memory allocation failure occurred.
Here is the call graph for this function:

◆ symbfact_SubXpand()

int_t symbfact_SubXpand ( int_t  n,
int_t  jcol,
int_t  next,
MemType  mem_type,
int_t maxlen,
Glu_freeable_t Glu_freeable 
)
Expand the data structures for L and U during the factorization.
Return value:   0 - successful return
              > 0 - number of bytes allocated when run out of space
Here is the call graph for this function:
Here is the caller graph for this function:

◆ Test_LDiagBlock_Recv()

int_t Test_LDiagBlock_Recv ( MPI_Request *  request,
SCT_t SCT 
)
Here is the caller graph for this function:

◆ Test_UDiagBlock_Recv()

int_t Test_UDiagBlock_Recv ( MPI_Request *  request,
SCT_t SCT 
)
Here is the caller graph for this function:

◆ testListPerm()

int_t testListPerm ( int_t  nodeCount,
int_t nodeList,
int_t permList,
int_t gTopLevel 
)
Here is the call graph for this function:

◆ testSubtreeNodelist()

int_t testSubtreeNodelist ( int_t  nsupers,
int_t  numList,
int_t **  nodeList,
int_t nodeCount 
)
Here is the call graph for this function:

◆ topological_ordering()

int_t * topological_ordering ( int_t  nsuper,
int_t setree 
)
Here is the call graph for this function:

◆ treeImbalance3D()

void treeImbalance3D ( gridinfo3d_t grid3d,
SCT_t SCT 
)
Here is the call graph for this function:

◆ TreePostorder_dist()

int_t * TreePostorder_dist ( int_t  n,
int_t parent 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ Trs2_InitUblock_info()

int_t Trs2_InitUblock_info ( int_t  klst,
int_t  nb,
Ublock_info_t Ublock_info,
int_t usub,
Glu_persist_t Glu_persist,
SuperLUStat_t stat 
)

◆ Wait_LDiagBlock_Recv()

int_t Wait_LDiagBlock_Recv ( MPI_Request *  request,
SCT_t SCT 
)

◆ Wait_LDiagBlockSend()

int_t Wait_LDiagBlockSend ( MPI_Request *  L_diag_blk_send_req,
gridinfo_t grid,
SCT_t SCT 
)
Here is the caller graph for this function:

◆ Wait_LSend()

int_t Wait_LSend ( int_t  k,
gridinfo_t grid,
int **  ToSendR,
MPI_Request *  s,
SCT_t SCT 
)

◆ Wait_LUDiagSend()

int Wait_LUDiagSend ( int_t  k,
MPI_Request *  U_diag_blk_send_req,
MPI_Request *  L_diag_blk_send_req,
gridinfo_t grid,
SCT_t SCT 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ Wait_UDiagBlock_Recv()

int_t Wait_UDiagBlock_Recv ( MPI_Request *  request,
SCT_t SCT 
)

◆ Wait_UDiagBlockSend()

int_t Wait_UDiagBlockSend ( MPI_Request *  U_diag_blk_send_req,
gridinfo_t grid,
SCT_t SCT 
)
Here is the caller graph for this function:

◆ Wait_USend()

int_t Wait_USend ( MPI_Request *  send_req,
gridinfo_t grid,
SCT_t SCT 
)

◆ xerr_dist()

int xerr_dist ( char *  srname,
int *  info 
)

Variable Documentation

◆ BC_L

const int BC_L =1
static

◆ BC_U

const int BC_U =3
static

◆ RD_L

const int RD_L =2
static

◆ RD_U

const int RD_U =4
static