69#if defined(VTUNE) && VTUNE>=1
86#define SUPERLU_DIST_MAJOR_VERSION 8
87#define SUPERLU_DIST_MINOR_VERSION 2
88#define SUPERLU_DIST_PATCH_VERSION 1
89#define SUPERLU_DIST_RELEASE_DATE "November 17, 2023"
105#include "gpu_api_utils.h"
113 #define mpi_int_t MPI_SHORT
114#elif defined (_LONGINT)
116 #define mpi_int_t MPI_LONG_LONG_INT
120 #define mpi_int_t MPI_INT
126#define SuperLU_MPI_COMPLEX MPI_C_COMPLEX
127#define SuperLU_MPI_DOUBLE_COMPLEX MPI_C_DOUBLE_COMPLEX
142#define MAX_SUPER_SIZE 512
196#define LB_DESCRIPTOR 2
198#define UB_DESCRIPTOR 2
199#define BC_HEADER_NEWU 3
200#define UB_DESCRIPTOR_NEWU 2
243#define COMM_COLUMN 101
249#define SUPER_LINEAR 11
250#define SUPER_BLOCK 12
262#define IAM(comm) { int rank; MPI_Comm_rank ( comm, &rank ); rank};
263#define MYROW(iam,grid) ( (iam) / grid->npcol )
264#define MYCOL(iam,grid) ( (iam) % grid->npcol )
265#define BlockNum(i) ( supno[i] )
266#define FstBlockC(bnum) ( xsup[bnum] )
267#define SuperSize(bnum) ( xsup[bnum+1]-xsup[bnum] )
268#define LBi(bnum,grid) ( (bnum)/grid->nprow )
269#define LBj(bnum,grid) ( (bnum)/grid->npcol )
270#define PROW(bnum,grid) ( (bnum) % grid->nprow )
271#define PCOL(bnum,grid) ( (bnum) % grid->npcol )
272#define PNUM(i,j,grid) ( (i)*grid->npcol + j )
273#define CEILING(a,b) ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )
275#define RHS_ITERATE(i) \
276 for (i = 0; i < nrhs; ++i)
278 ilsum[i] * nrhs + (i+1) * XK_H
280 ilsum[i] * nrhs + (i+1) * LSUM_H
282#define SuperLU_timer_ SuperLU_timer_dist_
283#define LOG2(x) (log10((double) x) / log10(2.0))
285#if defined(VAMPIR) && VAMPIR>=1
286#define VT_TRACEON VT_traceon()
287#define VT_TRACEOFF VT_traceoff()
294#ifndef SUPERLU_DIST_EXPORT
295#if defined(MSVC) && MSVC
296#ifdef SUPERLU_DIST_EXPORTS
297#define SUPERLU_DIST_EXPORT __declspec(dllexport)
299#define SUPERLU_DIST_EXPORT __declspec(dllimport)
302#define SUPERLU_DIST_EXPORT
322#define BLK_K 2048/(BLK_M)
329#define NWARP DIM_X*DIM_Y/32
334#define THR_M ( BLK_M / DIM_X )
335#define THR_N ( BLK_N / DIM_Y )
337#define fetch(A, m, n, bound) offs_d##A[min(n*LD##A+m, bound)]
338#define fma(A, B, C) C += (A*B)
348 #define cmax(a,b) ((a) > (b) ? (a) : (b))
561 int *ptr_to_ibuf, *ptr_to_dbuf;
564 int *X_to_B_iSendCnt;
565 int *X_to_B_vSendCnt;
726 double DiagPivotThresh;
733 double ILU_FillFactor;
749 char superlu_rankorder[4];
845 int_t first_l_block_acc , first_u_block_acc;
847 int_t *Lblock_dirty_bit, * Ublock_dirty_bit;
851 int_t num_look_aheads, nsupers;
853 int_t num_u_blks, num_u_blks_Phi;
886#define SLU_MPI_TAG(id,num) ( (6*(num)+id) % tag_ub )
910 int_t *iperm_c_supno;
920 int_t* IbcastPanel_L;
921 int_t* IbcastPanel_U;
945 int_t numDescendents;
1001 MPI_Request* L_diag_blk_recv_req;
1002 MPI_Request* L_diag_blk_send_req;
1003 MPI_Request* U_diag_blk_recv_req;
1004 MPI_Request* U_diag_blk_send_req;
1005 MPI_Request* recv_req;
1006 MPI_Request* recv_requ;
1007 MPI_Request* send_req;
1008 MPI_Request* send_requ;
1013 int_t *iperm_c_supno;
1092extern void bcast_tree(
void *,
int, MPI_Datatype,
int,
int,
1178int superlu_sort_perm (
const void *arg1,
const void *arg2)
1182 return (*val2 < *val1);
1188 int *,
int *,
int *,
int,
1189 int,
int,
int *,
int,
int_t);
1223#ifndef __SUPERLU_ASYNC_TREE
1224#define __SUPERLU_ASYNC_TREE
1227 MPI_Request sendRequests_[2];
1245extern void C_RdTree_Create(
C_Tree* tree, MPI_Comm comm,
int* ranks,
int rank_cnt,
int msgSize,
char precision);
1251extern void C_BcTree_Create(
C_Tree* tree, MPI_Comm comm,
int* ranks,
int rank_cnt,
int msgSize,
char precision);
1267 int_t** Lrowind_bc_ptr,
int_t** Ufstnz_br_ptr,
1363 int_t** Lrowind_bc_ptr,
int_t** Ufstnz_br_ptr,
1366 MPI_Request *L_diag_blk_send_req,
1422 MPI_Request *s,
SCT_t*);
void superlu_free_dist(void *)
Definition: memory.c:168
void ifill_dist(int_t *, int_t, int_t)
Fills an integer array with a given value.
Definition: util.c:488
int_t Wait_USend(MPI_Request *, gridinfo_t *, SCT_t *)
Definition: communication_aux.c:56
void superlu_gridinit3d(MPI_Comm Bcomm, int nprow, int npcol, int npdep, gridinfo3d_t *grid)
All processes in the MPI communicator must call this routine.
Definition: superlu_grid3d.c:25
void PrintDouble5(char *, int_t, double *)
int_t num_full_cols_U(int_t kk, int_t **Ufstnz_br_ptr, int_t *xsup, gridinfo_t *, int_t *, int_t *)
Definition: util.c:944
int_t ** getTreePermForest(int_t *myTreeIdxs, int_t *myZeroTrIdxs, sForest_t *sForests, int_t *perm_c_supno, int_t *iperm_c_supno, gridinfo3d_t *grid3d)
void print_options_dist(superlu_dist_options_t *)
Print the options setting.
Definition: util.c:228
void C_BcTree_forwardMessageSimple(C_Tree *tree, void *localBuffer, int msgSize)
Definition: comm_tree.c:75
treePartStrat
Definition: superlu_defs.h:895
@ GD
Definition: superlu_defs.h:897
@ ND
Definition: superlu_defs.h:896
int getNsupers(int n, Glu_persist_t *Glu_persist)
Definition: trfAux.c:42
void superlu_gridexit3d(gridinfo3d_t *grid)
Definition: superlu_grid3d.c:256
int xerr_dist(char *, int *)
Definition: xerr_dist.c:26
void treeImbalance3D(gridinfo3d_t *grid3d, SCT_t *SCT)
Definition: sec_structs.c:532
int_t psymbfact_LUXpand_RL(int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
Definition: psymbfact_util.c:384
void print_etree(int_t *setree, int_t *iperm, int_t nsuper)
Definition: supernodal_etree.c:1045
int_t log2i(int_t index)
Definition: supernodal_etree.c:17
int_t partitionM(int_t *, int_t, int_t, int_t, int_t, int_t)
Definition: util.c:1125
void Destroy_CompCol_Matrix_dist(SuperMatrix *)
Definition: util.c:34
int_t getDescendList(int_t k, int_t *dlist, treeList_t *treeList)
Definition: supernodal_etree.c:259
void arrive_at_ublock(int_t, int_t *, int_t *, int_t *, int_t *, int_t *, int_t, int_t, int_t *, int_t *, int_t *, gridinfo_t *)
Definition: util.c:890
int_t * getMyNodeCountsFr(int_t maxLvl, int_t *myTreeIdxs, sForest_t **sForests)
Definition: supernodalForest.c:276
int_t Wait_UDiagBlockSend(MPI_Request *, gridinfo_t *, SCT_t *)
Definition: communication_aux.c:112
int_t * getMyTopOrder(int_t nnodes, int_t *myPerm, int_t *myIperm, int_t *setree)
Definition: supernodal_etree.c:852
int_t * getFactPerm(int_t)
Definition: trfAux.c:208
int free_treelist(int_t nsuper, treeList_t *treeList)
Definition: supernodal_etree.c:114
void C_BcTree_Nullify(C_Tree *tree)
Definition: comm_tree.c:56
int_t * getEtreeLB(int_t nnodes, int_t *perm_l, int_t *gTopOrder)
Definition: supernodal_etree.c:339
int_t initCommRequests(commRequests_t *comReqs, gridinfo_t *grid)
Definition: treeFactorization.c:227
int_t * calcNumNodes(int_t maxLvl, int_t *treeHeads, treeList_t *treeList)
Definition: supernodal_etree.c:733
int_t * getFactIperm(int_t *, int_t)
Definition: trfAux.c:221
void DistPrint(char *function_name, double value, char *Units, gridinfo_t *grid)
Definition: sec_structs.c:313
int_t * getPerm_c_supno(int_t nsupers, superlu_dist_options_t *, int_t *etree, Glu_persist_t *Glu_persist, int_t **Lrowind_bc_ptr, int_t **Ufstnz_br_ptr, gridinfo_t *)
Definition: trfAux.c:234
int_t * intMalloc_dist(int_t)
Definition: memory.c:219
int_t reduceStat(PhaseType PHASE, SuperLUStat_t *stat, gridinfo3d_t *grid3d)
Definition: util.c:1256
int_t * merg_perms(int_t nperms, int_t *nnodes, int_t **perms)
Definition: supernodal_etree.c:482
int sp_ienv_dist(int)
Definition: sp_ienv.c:73
int_t static_partition(struct superlu_pair *, int_t, int_t *, int_t, int_t *, int_t *, int)
Definition: util.c:864
int_t Wait_LDiagBlock_Recv(MPI_Request *, SCT_t *)
Definition: communication_aux.c:172
sForest_t ** getNestDissForests(int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
Definition: supernodalForest.c:62
int_t ** getNodeListFr(int_t maxLvl, sForest_t **sForests)
Definition: supernodalForest.c:232
void SCT_free(SCT_t *)
Definition: sec_structs.c:294
int_t get_min(int_t *, int_t)
Definition: util.c:847
double estimate_cpu_time(int m, int n, int k)
Definition: acc_aux.c:214
void countnz_dist(const int_t, int_t *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
Definition: util.c:95
int sort_R_info(Remain_info_t *Remain_info, int n)
Definition: sec_structs.c:54
int_t * getTreeHeads(int_t maxLvl, int_t nsupers, treeList_t *treeList)
Definition: supernodal_etree.c:705
void log_memory(int64_t, SuperLUStat_t *)
Definition: util.c:783
int freeMsgsArr(int_t numLA, msgs_t **msgss)
Definition: treeFactorization.c:356
sForest_t ** getForests(int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
Definition: supernodalForest.c:29
int_t getCommonAncestorList(int_t k, int_t *alist, int_t *seTree, treeList_t *treeList)
Definition: supernodal_etree.c:290
void print_panel_seg_dist(int_t, int_t, int_t, int_t, int_t *, int_t *)
Diagnostic print of segment info after panel_dfs().
Definition: util.c:276
void superlu_abort_and_exit_dist(char *)
Definition: memory.c:48
int sort_R_info_elm(Remain_info_t *Remain_info, int n)
Definition: sec_structs.c:82
void Destroy_SuperNode_Matrix_dist(SuperMatrix *)
Definition: util.c:61
int_t initFactStat(int_t nsupers, factStat_t *factStat)
Definition: treeFactorization.c:274
int_t * getReplicatedTrees(gridinfo3d_t *grid3d)
Definition: supernodal_etree.c:815
void SCT_init(SCT_t *)
Definition: sec_structs.c:165
int sp_symetree_dist(int_t *, int_t *, int_t *, int_t, int_t *)
Symmetric elimination tree.
Definition: etree.c:156
int_t getBigUSize(int_t nsupers, gridinfo_t *grid, int_t **Lrowind_bc_ptr)
Definition: trfAux.c:162
int_t symbfact_SubXpand(int_t, int_t, int_t, MemType, int_t *, Glu_freeable_t *)
Definition: memory.c:433
void C_BcTree_Create(C_Tree *tree, MPI_Comm comm, int *ranks, int rank_cnt, int msgSize, char precision)
Definition: comm_tree.c:5
int_t symbfact_SubFree(Glu_freeable_t *)
Definition: memory.c:479
int freeFactNodelists(factNodelists_t *fNlists)
Definition: treeFactorization.c:327
float smach_dist(char *)
Definition: smach_dist.c:16
int_t ** getTreePerm(int_t *myTreeIdxs, int_t *myZeroTrIdxs, int_t *nodeCount, int_t **nodeList, int_t *perm_c_supno, int_t *iperm_c_supno, gridinfo3d_t *grid3d)
Definition: util.c:1190
int compare_pair(const void *, const void *)
Definition: util.c:799
void Destroy_CompCol_Permuted_dist(SuperMatrix *)
A is of type Stype==NCP.
Definition: util.c:73
int file_PrintInt10(FILE *, char *, int_t, int_t *)
Definition: util.c:628
int_t partition(int_t *, int_t, int_t, int_t)
Definition: util.c:1057
void super_stats_dist(int_t, int_t *)
Definition: util.c:542
int_t * getMyEtLims(int_t nnodes, int_t *myTopOrder)
Definition: supernodal_etree.c:886
void PStatPrint(superlu_dist_options_t *, SuperLUStat_t *, gridinfo_t *)
Definition: util.c:308
void get_perm_c_dist(int_t, int_t, SuperMatrix *, int_t *)
Definition: get_perm_c.c:464
int_t * getNodeCountsFr(int_t maxLvl, sForest_t **sForests)
Definition: supernodalForest.c:214
yes_no_t C_BcTree_IsRoot(C_Tree *tree)
Definition: comm_tree.c:71
void PStatInit(SuperLUStat_t *)
Definition: util.c:290
void C_RdTree_waitSendRequest(C_Tree *Tree)
Definition: comm_tree.c:186
void print_sp_ienv_dist(superlu_dist_options_t *)
Print the blocking parameters.
Definition: util.c:252
int_t testSubtreeNodelist(int_t nsupers, int_t numList, int_t **nodeList, int_t *nodeCount)
Definition: supernodal_etree.c:407
int_t symbfact(superlu_dist_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
Definition: symbfact.c:82
int Wait_LUDiagSend(int_t k, MPI_Request *U_diag_blk_send_req, MPI_Request *L_diag_blk_send_req, gridinfo_t *grid, SCT_t *SCT)
Definition: communication_aux.c:195
int sort_U_info_elm(Ublock_info_t *Ublock_info, int n)
Definition: sec_structs.c:90
void PStatFree(SuperLUStat_t *)
Definition: util.c:480
float get_perm_c_parmetis(SuperMatrix *, int_t *, int_t *, int, int, int_t **, int_t **, gridinfo_t *, MPI_Comm *)
Definition: get_perm_c_parmetis.c:104
int mc64id_dist(int *)
Definition: mc64ad_dist.c:57
int_t QuerySpace_dist(int_t, int_t, Glu_freeable_t *, superlu_dist_mem_usage_t *)
Definition: memory.c:617
int_t Test_UDiagBlock_Recv(MPI_Request *, SCT_t *)
Definition: communication_aux.c:161
void at_plus_a_dist(const int_t, const int_t, int_t *, int_t *, int_t *, int_t **, int_t **)
Definition: get_perm_c.c:301
double dmach_dist(char *)
Definition: dmach_dist.c:16
void get_diag_procs(int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **)
Definition: util.c:495
int sp_coletree_dist(int_t *, int_t *, int_t *, int_t, int_t, int_t *)
Nonsymmetric elimination tree.
Definition: etree.c:223
int_t Wait_LDiagBlockSend(MPI_Request *, gridinfo_t *, SCT_t *)
Definition: communication_aux.c:132
int_t Trs2_InitUblock_info(int_t klst, int_t nb, Ublock_info_t *, int_t *usub, Glu_persist_t *, SuperLUStat_t *)
Definition: trfAux.c:1172
int_t ** getNodeList(int_t maxLvl, int_t *setree, int_t *nnodes, int_t *treeHeads, treeList_t *treeList)
Definition: supernodal_etree.c:759
int * int32Calloc_dist(int)
Definition: memory.c:209
int Cmpfunc_U_info(const void *a, const void *b)
Definition: sec_structs.c:48
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:385
int superlu_dist_GetVersionNumber(int *, int *, int *)
Definition: superlu_dist_version.c:22
int_t Check_LRecv(MPI_Request *, int *msgcnt)
Definition: communication_aux.c:79
struct xtrsTimer_t xtrsTimer_t
int set_tag_ub()
Definition: trfAux.c:48
void C_RdTree_forwardMessageSimple(C_Tree *Tree, void *localBuffer, int msgSize)
Definition: comm_tree.c:169
int get_acc_offload()
Definition: sec_structs.c:582
void printForestWeightCost(sForest_t **sForests, SCT_t *SCT, gridinfo3d_t *grid3d)
Definition: supernodalForest.c:352
yes_no_t C_RdTree_IsRoot(C_Tree *tree)
Definition: comm_tree.c:164
int * int32Malloc_dist(int)
Definition: memory.c:202
void Destroy_CompRow_Matrix_dist(SuperMatrix *)
Definition: util.c:53
void PrintInt32(char *, int, int *)
Definition: util.c:614
int_t LDiagBlockRecvWait(int_t k, int_t *factored_U, MPI_Request *, gridinfo_t *)
Definition: communication_aux.c:218
float symbfact_dist(int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *, Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, superlu_dist_mem_usage_t *)
Definition: psymbfact.c:142
void getSCUweight(int_t nsupers, treeList_t *treeList, int_t *xsup, int_t **Lrowind_bc_ptr, int_t **Ufstnz_br_ptr, gridinfo3d_t *grid3d)
Definition: trfAux.c:1205
treeList_t * setree2list(int_t nsuper, int_t *setree)
Definition: supernodal_etree.c:71
int_t Wait_UDiagBlock_Recv(MPI_Request *, SCT_t *)
Definition: communication_aux.c:152
int_t checkIntVector3d(int_t *vec, int_t len, gridinfo3d_t *grid3d)
Definition: util.c:1219
int_t ** getTreePermFr(int_t *myTreeIdxs, sForest_t **sForests, gridinfo3d_t *grid3d)
Definition: supernodalForest.c:290
void print_memorylog(SuperLUStat_t *, char *)
Definition: util.c:793
int64_t fixupL_dist(const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *)
Definition: util.c:158
void printTRStimer(xtrsTimer_t *xtrsTimer, gridinfo3d_t *grid3d)
int_t * getSubTreeRoots(int_t k, treeList_t *treeList)
Definition: supernodal_etree.c:378
void SCT_print(gridinfo_t *grid, SCT_t *SCT)
Definition: sec_structs.c:433
void superlu_gridmap(MPI_Comm, int, int, int[], int, gridinfo_t *)
All processes in the MPI communicator must call this routine.
Definition: superlu_grid.c:87
int_t calcTreeWeight(int_t nsupers, int_t *setree, treeList_t *treeList, int_t *xsup)
Definition: supernodal_etree.c:183
int get_mpi_process_per_gpu()
Definition: util.c:1528
int_t * getGridTrees(gridinfo3d_t *grid3d)
Definition: supernodal_etree.c:802
int_t * topological_ordering(int_t nsuper, int_t *setree)
Definition: supernodal_etree.c:54
int_t * calculate_num_children(int_t nsuper, int_t *setree)
Definition: supernodal_etree.c:958
int * getIsNodeInMyGrid(int_t nsupers, int_t maxLvl, int_t *myNodeCount, int_t **treePerm)
Definition: supernodalForest.c:305
void pxerr_dist(char *, gridinfo_t *, int_t)
Definition: pxerr_dist.c:27
int_t * getPermNodeList(int_t nnode, int_t *nlist, int_t *perm_c_sup, int_t *iperm_c_sup)
Definition: supernodal_etree.c:317
int_t * getMyNodeCounts(int_t maxLvl, int_t *myTreeIdxs, int_t *gNodeCount)
Definition: util.c:1208
int * getLastDepBtree(int_t nsupers, treeList_t *treeList)
int freeCommRequestsArr(int_t mxLeafNode, commRequests_t **comReqss)
Definition: treeFactorization.c:257
int getNumThreads(int)
Definition: trfAux.c:61
int_t psymbfact_LUXpandMem(int_t, int_t, int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
Definition: psymbfact_util.c:91
int file_PrintInt32(FILE *, char *, int, int *)
Definition: util.c:643
int_t testListPerm(int_t nodeCount, int_t *nodeList, int_t *permList, int_t *gTopLevel)
Definition: supernodal_etree.c:436
int_t * getMyIperm(int_t nnodes, int_t nsupers, int_t *myPerm)
Definition: supernodal_etree.c:835
int sort_U_info(Ublock_info_t *Ublock_info, int n)
Definition: sec_structs.c:61
int file_PrintLong10(FILE *, char *, int_t, int_t *)
void PrintInt10(char *, int_t, int_t *)
Definition: util.c:600
treeTopoInfo_t getMyTreeTopoInfo(int_t nnodes, int_t nsupers, int_t *myPerm, int_t *setree)
Definition: supernodal_etree.c:913
void superlu_gridexit(gridinfo_t *)
Definition: superlu_grid.c:200
int_t printFileList(char *sname, int_t nnodes, int_t *dlist, int_t *setree)
Definition: supernodal_etree.c:241
int_t initMsgs(msgs_t *msgs)
Definition: treeFactorization.c:336
int int_t
Definition: superlu_defs.h:114
void DistPrint3D(char *function_name, double value, char *Units, gridinfo3d_t *grid3d)
Definition: sec_structs.c:341
int_t symbfact_SubInit(fact_t, void *, int_t, int_t, int_t, int_t, Glu_persist_t *, Glu_freeable_t *)
Definition: memory.c:304
int_t psymbfact_LUXpand(int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
Definition: psymbfact_util.c:213
void SCT_printComm3D(gridinfo3d_t *grid3d, SCT_t *SCT)
Definition: sec_structs.c:563
void C_RdTree_Nullify(C_Tree *tree)
Definition: comm_tree.c:148
void C_BcTree_waitSendRequest(C_Tree *tree)
Definition: comm_tree.c:90
void initTRStimer(xtrsTimer_t *xtrsTimer, gridinfo_t *grid)
void SCT_print3D(gridinfo3d_t *grid3d, SCT_t *SCT)
Definition: sec_structs.c:508
void * superlu_malloc_dist(size_t)
Definition: memory.c:163
void isort1(int_t N, int_t *ARRAY)
Definition: util.c:738
int_t * TreePostorder_dist(int_t, int_t *)
Definition: etree.c:393
int_t getCommonAncsCount(int_t k, treeList_t *treeList)
Definition: supernodal_etree.c:278
int_t psymbfact_prLUXpand(int_t, int_t, int, Llu_symbfact_t *, psymbfact_stat_t *)
Definition: psymbfact_util.c:502
commRequests_t ** initCommRequestsArr(int_t mxLeafNode, int_t ldt, gridinfo_t *grid)
Definition: treeFactorization.c:243
int_t initFactNodelists(int_t, int_t, int_t, factNodelists_t *)
Definition: treeFactorization.c:312
void Destroy_SuperMatrix_Store_dist(SuperMatrix *)
Deallocate the structure pointing to the actual storage of the matrix.
Definition: util.c:29
void Print_EtreeLevelBoundry(int_t *Etree_LvlBdry, int_t max_level, int_t nsuper)
Definition: supernodal_etree.c:974
void quickSortM(int_t *, int_t, int_t, int_t, int_t, int_t)
Definition: util.c:1109
int_t Wait_LSend(int_t k, gridinfo_t *grid, int **ToSendR, MPI_Request *s, SCT_t *)
Definition: communication_aux.c:32
int_t get_max_buffer_size()
Definition: util.c:815
void C_RdTree_Create(C_Tree *tree, MPI_Comm comm, int *ranks, int rank_cnt, int msgSize, char precision)
Definition: comm_tree.c:100
int get_thread_per_process()
Definition: util.c:804
int_t * intCalloc_dist(int_t)
Definition: memory.c:226
int Cmpfunc_R_info(const void *a, const void *b)
Definition: sec_structs.c:41
int_t * Etree_LevelBoundry(int_t *perm, int_t *tsort_etree, int_t nsuper)
Definition: supernodal_etree.c:927
void set_default_options_dist(superlu_dist_options_t *)
Set the default values for the options argument.
Definition: util.c:198
int genmmd_dist_(int_t *, int_t *, int_t *a, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *)
Definition: mmd.c:64
void isort(int_t N, int_t *ARRAY1, int_t *ARRAY2)
Definition: util.c:687
int_t * getGlobal_iperm(int_t nsupers, int_t nperms, int_t **perms, int_t *nnodes)
Definition: supernodal_etree.c:679
void quickSort(int_t *, int_t, int_t, int_t)
Definition: util.c:1044
msgs_t ** initMsgsArr(int_t numLA)
Definition: treeFactorization.c:343
void print_etree_leveled(int_t *setree, int_t *tsort_etree, int_t nsuper)
Definition: supernodal_etree.c:986
int_t CheckZeroDiagonal(int_t, int_t *, int_t *, int_t *)
Definition: util.c:658
int_t Test_LDiagBlock_Recv(MPI_Request *, SCT_t *)
Definition: communication_aux.c:182
void sp_colorder(superlu_dist_options_t *, SuperMatrix *, int_t *, int_t *, SuperMatrix *)
Definition: sp_colorder.c:81
void check_repfnz_dist(int_t, int_t, int_t, int_t *)
Check whether repfnz[] == EMPTY after reset.
Definition: util.c:586
void superlu_gridinit(MPI_Comm, int, int, gridinfo_t *)
All processes in the MPI communicator must call this routine.
Definition: superlu_grid.c:37
sForest_t ** getGreedyLoadBalForests(int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
Definition: supernodalForest.c:794
int_t estimate_bigu_size(int_t, int_t **, Glu_persist_t *, gridinfo_t *, int_t *, int_t *)
Definition: util.c:991
void Destroy_CompRowLoc_Matrix_dist(SuperMatrix *)
Definition: util.c:44
int freeFactStat(factStat_t *factStat)
Definition: treeFactorization.c:299
int getnGPUStreams()
Definition: util.c:1512
int_t * supernodal_etree(int_t nsuper, int_t *etree, int_t *supno, int_t *xsup)
Definition: supernodal_etree.c:32
void bcast_tree(void *, int, MPI_Datatype, int, int, gridinfo_t *, int, int *)
Definition: comm.c:72
trans_t
Definition: superlu_enum_consts.h:34
milu_t
Definition: superlu_enum_consts.h:46
LU_space_t
Definition: superlu_enum_consts.h:40
DiagScale_t
Definition: superlu_enum_consts.h:35
IterRefine_t
Definition: superlu_enum_consts.h:36
rowperm_t
Definition: superlu_enum_consts.h:31
colperm_t
Definition: superlu_enum_consts.h:32
MemType
Definition: superlu_enum_consts.h:38
fact_t
Definition: superlu_enum_consts.h:30
norm_t
Definition: superlu_enum_consts.h:41
yes_no_t
Definition: superlu_enum_consts.h:29
PhaseType
Definition: superlu_enum_consts.h:66
#define MAX_3D_LEVEL
Definition: util_dist.h:77
int_t get_gpublas_nb()
Definition: util.c:826
int_t get_num_gpu_streams()
Definition: util.c:837
void gemm_division_cpu_gpu(superlu_dist_options_t *options, int *num_streams_used, int *stream_end_col, int *ncpu_blks, int nbrow, int ldu, int nstreams, int *full_u_cols, int num_blks, int_t gemmBufferSize)
Definition: util.c:1330
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definitions for parallel symbolic factorization routine.
Definition: superlu_defs.h:1142
Definition: superlu_defs.h:490
Definition: superlu_defs.h:435
Definition: superlu_ddefs.h:329
void * lookAhead_L_buff
Definition: superlu_defs.h:848
int nGPUStreams
Definition: superlu_defs.h:865
int offloadCondition
Definition: superlu_defs.h:863
void * bigU_host
Definition: superlu_defs.h:857
void * bigU_Phi
Definition: superlu_defs.h:856
int superlu_acc_offload
Definition: superlu_defs.h:864
Definition: psymbfact.h:106
Definition: psymbfact.h:57
Definition: superlu_defs.h:770
Definition: util_dist.h:172
Definition: util_dist.h:95
Definition: supermatrix.h:54
Definition: superlu_defs.h:760
Definition: superlu_defs.h:752
Definition: superlu_defs.h:924
Definition: superlu_defs.h:852
Definition: superlu_defs.h:780
Definition: superlu_defs.h:937
Definition: superlu_defs.h:839
int * factored
Definition: superlu_defs.h:915
Definition: superlu_defs.h:890
Definition: superlu_defs.h:398
Definition: superlu_defs.h:388
Definition: superlu_ddefs.h:317
void * lusup
Definition: superlu_defs.h:824
Definition: superlu_defs.h:815
Definition: superlu_defs.h:822
Definition: superlu_defs.h:947
Definition: superlu_ddefs.h:397
Definition: superlu_defs.h:833
statistics collected during parallel symbolic factorization
Definition: psymbfact.h:194
Definition: superlu_defs.h:551
Definition: superlu_defs.h:901
Definition: superlu_defs.h:744
Definition: superlu_defs.h:712
int superlu_acc_offload
Definition: superlu_defs.h:754
yes_no_t Use_TensorCore
Definition: superlu_defs.h:756
int superlu_maxsup
Definition: superlu_defs.h:748
int superlu_relax
Definition: superlu_defs.h:747
int superlu_num_gpu_streams
Definition: superlu_defs.h:753
int superlu_max_buffer_size
Definition: superlu_defs.h:752
int superlu_n_gemm
Definition: superlu_defs.h:751
Definition: superlu_defs.h:786
int val
Definition: superlu_defs.h:788
int ind
Definition: superlu_defs.h:787
Definition: superlu_defs.h:381
Definition: superlu_defs.h:868
Definition: superlu_defs.h:882
Definition: superlu_ddefs.h:308
Local information on vertices distribution.
Definition: psymbfact.h:140
Definition: superlu_defs.h:953
int_t trsMsgRecvZ
Definition: superlu_defs.h:974
double t_forwardSolve
Definition: superlu_defs.h:960
double tbs_comm
Definition: superlu_defs.h:965
double t_pdReDistribute_X_to_B
Definition: superlu_defs.h:958
int_t trsMsgRecvXY
Definition: superlu_defs.h:973
double tfs_comm
Definition: superlu_defs.h:962
int_t trsMsgSentZ
Definition: superlu_defs.h:972
double trsDataSendZ
Definition: superlu_defs.h:955
double trsDataRecvZ
Definition: superlu_defs.h:957
double trsDataSendXY
Definition: superlu_defs.h:954
double t_pdReDistribute_B_to_X
Definition: superlu_defs.h:959
double ppXmem
Definition: superlu_defs.h:976
double tbs_compute
Definition: superlu_defs.h:964
double tfs_tree[2 *MAX_3D_LEVEL]
Definition: superlu_defs.h:967
double tbs_tree[2 *MAX_3D_LEVEL]
Definition: superlu_defs.h:966
int_t trsMsgSentXY
Definition: superlu_defs.h:971
double t_backwardSolve
Definition: superlu_defs.h:963
double trsDataRecvXY
Definition: superlu_defs.h:956
double tfs_compute
Definition: superlu_defs.h:961
void PStatClear(SuperLUStat_t *)
Definition: util.c:336
static const int RD_U
Definition: superlu_defs.h:237
#define SuperLU_timer_
Definition: superlu_defs.h:282
int check_perm_dist(char *what, int_t n, int_t *perm)
Definition: sp_colorder.c:224
static const int BC_U
Definition: superlu_defs.h:236
void superlu_gridmap3d(MPI_Comm, int, int, int, int[], gridinfo3d_t *)
All processes in the MPI communicator must call this routine. On output, if a process is not in the S...
Definition: superlu_grid3d.c:66
static const int BC_L
Definition: superlu_defs.h:234
static const int RD_L
Definition: superlu_defs.h:235
enum constants header file