16#define GLOBAL_BLOCK_NOT_FOUND -1
20template <
typename Ftype>
29 xlpanelGPU_t<Ftype> gpuPanel;
137 xlpanelGPU_t<Ftype> copyToGPU();
138 xlpanelGPU_t<Ftype> copyToGPU(
void *basePtr);
142 int_t panelSolveGPU(cublasHandle_t handle, cudaStream_t cuStream,
148 Ftype *UBlk,
int_t LDU,
149 Ftype *DiagLBlk,
int_t LDD,
150 Ftype thresh,
int_t *xsup,
154 cusolverDnHandle_t cusolverH, cudaStream_t cuStream,
155 Ftype *dWork,
int *dInfo,
156 Ftype *dDiagBuf,
int_t LDD,
161 Ftype *blkPtrGPU(
int k)
174template <
typename Ftype>
182 xupanelGPU_t<Ftype> gpuPanel;
280 std::cout <<
"## Warning: Empty Panel"
286 if (
nzcols() != alternateNzcols)
288 printf(
"Error 175\n");
308 xupanelGPU_t<Ftype> copyToGPU();
310 xupanelGPU_t<Ftype> copyToGPU(
void *basePtr);
313 int_t panelSolveGPU(cublasHandle_t handle, cudaStream_t cuStream,
317 Ftype *blkPtrGPU(
int k)
333template <
typename Ftype>
392 #pragma warning disabling bcastStruct
394 std::vector<bcastStruct> bcastDiagRow;
395 std::vector<bcastStruct> bcastDiagCol;
396 std::vector<bcastStruct> bcastLval;
397 std::vector<bcastStruct> bcastUval;
398 std::vector<bcastStruct> bcastLidx;
399 std::vector<bcastStruct> bcastUidx;
497 for (
int stream = 0; stream <
A_gpu.numCudaStreams; stream++)
499 cusolverDnDestroy(
A_gpu.cuSolveHandles[stream]);
500 cublasDestroy(
A_gpu.cuHandles[stream]);
501 cublasDestroy(
A_gpu.lookAheadLHandle[stream]);
502 cublasDestroy(
A_gpu.lookAheadUHandle[stream]);
590 int_t setLUstruct_GPU();
591 int_t dsparseTreeFactorGPU(
596 int_t dsparseTreeFactorGPUBaseline(
602 int_t dAncestorFactorBaselineGPU(
609 int_t dSchurComplementUpdateGPU(
612 int_t dSchurCompUpdatePartGPU(
615 cublasHandle_t handle, cudaStream_t cuStream,
617 int_t lookAheadUpdateGPU(
620 int_t dSchurCompUpLimitedMem(
625 int_t dSchurCompUpdateExcludeOneGPU(
636 int_t zRecvLPanelGPU(
int_t k0,
int_t senderGrid, Ftype alpha, Ftype beta);
638 int_t zRecvUPanelGPU(
int_t k0,
int_t senderGrid, Ftype alpha, Ftype beta);
639 int_t copyLUGPUtoHost();
640 int_t copyLUHosttoGPU();
646 int_t SyncLookAheadUpdate(
int streamId);
648 Ftype *gpuLvalBasePtr, *gpuUvalBasePtr;
649 int_t *gpuLidxBasePtr, *gpuUidxBasePtr;
650 size_t gpuLvalSize, gpuUvalSize, gpuLidxSize, gpuUidxSize;
652 xlpanelGPU_t<Ftype> *copyLpanelsToGPU();
653 xupanelGPU_t<Ftype> *copyUpanelsToGPU();
Definition: xlupanels.hpp:22
int_t nzvalSize()
Definition: xlupanels.hpp:108
int_t find(int_t k)
Definition: l_panels_impl.hpp:52
int_t packDiagBlock(Ftype *DiagLBlk, int_t LDD)
Definition: l_panels_impl.hpp:101
int_t * rowList(int_t k)
Definition: xlupanels.hpp:80
int_t haveDiag()
Definition: xlupanels.hpp:59
int_t stRow(int k)
Definition: xlupanels.hpp:75
int_t gid(int_t k)
Definition: xlupanels.hpp:63
int_t diagFactor(int_t k, Ftype *UBlk, int_t LDU, threshPivValType< Ftype > thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
Definition: l_panels_impl.hpp:86
int_t nblocks()
Definition: xlupanels.hpp:53
size_t totalSize()
Definition: xlupanels.hpp:122
int_t indexSize()
Definition: xlupanels.hpp:115
Ftype * blkPtr(int_t k)
Definition: xlupanels.hpp:90
int_t * index
Definition: xlupanels.hpp:24
int_t nbrow(int_t k)
Definition: xlupanels.hpp:69
Ftype * val
Definition: xlupanels.hpp:25
int_t isEmpty()
Definition: xlupanels.hpp:107
int_t ncols()
Definition: xlupanels.hpp:60
xlpanel_t(int_t *index_, Ftype *val_)
Definition: xlupanels.hpp:50
int_t nzrows()
Definition: xlupanels.hpp:58
int getEndBlock(int iSt, int maxRows)
Definition: l_panels_impl.hpp:114
size_t blkPtrOffset(int_t k)
Definition: xlupanels.hpp:95
int_t LDA()
Definition: xlupanels.hpp:100
int_t panelSolve(int_t ksupsz, Ftype *DiagBlk, int_t LDD)
Definition: l_panels_impl.hpp:64
xlpanel_t()
Definition: xlupanels.hpp:43
Definition: xlupanels.hpp:176
int_t diagFactor(int_t k, Ftype *UBlk, int_t LDU, Ftype thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
int_t find(int_t k)
Definition: u_panels_impl.hpp:112
int_t indexSize()
Definition: xlupanels.hpp:266
int_t packed2skyline(int_t k, int_t *usub, Ftype *uval, int_t *xsup)
Definition: u_panels_impl.hpp:76
xupanel_t(int_t *index_, Ftype *val_)
Definition: xlupanels.hpp:201
int_t stCol(int k)
Definition: xlupanels.hpp:295
int_t * index
Definition: xlupanels.hpp:178
int_t checkCorrectness()
Definition: xlupanels.hpp:276
int_t isEmpty()
Definition: xlupanels.hpp:258
int_t nzcols()
Definition: xlupanels.hpp:208
int_t nzvalSize()
Definition: xlupanels.hpp:259
int_t LDA()
Definition: xlupanels.hpp:214
int_t nbcol(int_t k)
Definition: xlupanels.hpp:223
Ftype * blkPtr(int_t k)
Definition: xlupanels.hpp:238
Ftype * val
Definition: xlupanels.hpp:179
size_t totalSize()
Definition: xlupanels.hpp:272
int getEndBlock(int jSt, int maxCols)
Definition: u_panels_impl.hpp:134
xupanel_t()
Definition: xlupanels.hpp:194
int_t nblocks()
Definition: xlupanels.hpp:203
int_t gid(int_t k)
Definition: xlupanels.hpp:217
int_t * colList(int_t k)
Definition: xlupanels.hpp:228
int_t panelSolve(int_t ksupsz, Ftype *DiagBlk, int_t LDD)
Definition: u_panels_impl.hpp:124
size_t blkPtrOffset(int_t k)
Definition: xlupanels.hpp:243
typename std::conditional< std::is_same< Ftype, double >::value, dLUstruct_t, typename std::conditional< std::is_same< Ftype, float >::value, sLUstruct_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zLUstruct_t, void >::type >::type >::type LUStruct_type
Definition: luAuxStructTemplated.hpp:102
typename std::conditional< std::is_same< Ftype, double >::value, dtrf3Dpartition_t, typename std::conditional< std::is_same< Ftype, float >::value, strf3Dpartition_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, ztrf3Dpartition_t, void >::type >::type >::type trf3dpartitionType
Definition: luAuxStructTemplated.hpp:87
typename std::conditional< std::is_same< Ftype, float >::value, float, typename std::conditional< std::is_same< Ftype, double >::value||std::is_same< Ftype, doublecomplex >::value, double, float >::type >::type threshPivValType
Definition: luAuxStructTemplated.hpp:70
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
#define UPANEL_HEADER_SIZE
Definition: lu_common.hpp:7
#define LPANEL_HEADER_SIZE
Definition: lu_common.hpp:6
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: util_dist.h:199
Definition: util_dist.h:101
Definition: anc25d.hpp:13
Definition: superlu_ddefs.h:97
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
Definition: superlu_defs.h:404
Definition: superlu_defs.h:989
Definition: superlu_defs.h:728
int num_lookaheads
Definition: superlu_defs.h:757
Definition: xlupanels.hpp:335
int_t Pr
Definition: xlupanels.hpp:340
int_t dSchurCompUpdateExcludeOne(int_t k, int_t ex, xlpanel_t< Ftype > &lpanel, xupanel_t< Ftype > &upanel)
Definition: lupanels_impl.hpp:528
int * d_lblock_start_dat
Definition: xlupanels.hpp:421
xLUstructGPU_t< Ftype > A_gpu
Definition: xlupanels.hpp:412
dLocalLU_t d_localLU
Definition: xlupanels.hpp:418
diagFactBufs_type< Ftype > ** dFBufs
Definition: xlupanels.hpp:363
int_t g2lCol(int_t k)
Definition: xlupanels.hpp:407
int marshallSCUBatchedDataOuter(int k_st, int k_end, int_t *perm_c_supno)
dLocalLU_t * host_Llu
Definition: xlupanels.hpp:417
int_t maxUvalCount
Definition: xlupanels.hpp:375
int_t lookAheadUpdate(int_t k, int_t laIdx, xlpanel_t< Ftype > &lpanel, xupanel_t< Ftype > &upanel)
Definition: lupanels_impl.hpp:457
std::vector< int_t * > LidxRecvBufs
Definition: xlupanels.hpp:382
int_t dDiagFactorPanelSolve(int_t k, int_t offset, diagFactBufs_type< Ftype > **dFBufs)
Definition: lupanels_impl.hpp:558
int_t mycol
Definition: xlupanels.hpp:340
SuperLUStat_t * stat
Definition: xlupanels.hpp:356
int numDiagBufs
Definition: xlupanels.hpp:351
gridinfo_t * grid
Definition: xlupanels.hpp:339
~xLUstruct_t()
Definition: xlupanels.hpp:442
void marshallBatchedTRSMUData(int k_st, int k_end, int_t *perm_c_supno)
xlpanel_t< Ftype > * lPanelVec
Definition: xlupanels.hpp:336
int marshallSCUBatchedDataInner(int k_st, int k_end, int_t *perm_c_supno)
void marshallBatchedTRSMLData(int k_st, int k_end, int_t *perm_c_supno)
int_t blockUpdate(int_t k, int_t ii, int_t jj, xlpanel_t< Ftype > &lpanel, xupanel_t< Ftype > &upanel)
Definition: lupanels_impl.hpp:497
int_t * indirectCol
Definition: xlupanels.hpp:345
int_t myrow
Definition: xlupanels.hpp:340
int_t procIJ(int_t i, int_t j)
Definition: xlupanels.hpp:404
void dFactBatchSolve(int k_st, int k_end, int_t *perm_c_supno)
int ** d_lblock_gid_ptrs
Definition: xlupanels.hpp:420
int_t packedU2skyline(LUStruct_type< Ftype > *LUstruct)
Definition: lupanels_impl.hpp:427
int nThreads
Definition: xlupanels.hpp:344
int_t maxLvl
Definition: xlupanels.hpp:360
int_t g2lRow(int_t k)
Definition: xlupanels.hpp:406
superlu_dist_options_t * options
Definition: xlupanels.hpp:355
std::vector< int_t > UvalSendCounts
Definition: xlupanels.hpp:387
int_t * indirect
Definition: xlupanels.hpp:345
Ftype * bigV
Definition: xlupanels.hpp:346
int_t kcol(int_t k)
Definition: xlupanels.hpp:403
int_t maxUidxCount
Definition: xlupanels.hpp:376
xLUstruct_t(int_t nsupers, int_t ldt_, trf3dpartitionType< Ftype > *trf3Dpartition, LUStruct_type< Ftype > *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT_, superlu_dist_options_t *options_, SuperLUStat_t *stat, threshPivValType< Ftype > thresh_, int *info_)
Definition: lupanels_impl.hpp:83
threshPivValType< Ftype > thresh
Definition: xlupanels.hpp:348
std::vector< int_t * > UidxRecvBufs
Definition: xlupanels.hpp:383
int_t dsparseTreeFactor(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:8
std::vector< Ftype * > LvalRecvBufs
Definition: xlupanels.hpp:380
int64_t total_start_size
Definition: xlupanels.hpp:423
int_t pdgstrf3d()
Definition: pdgstrf3d_upacked_impl.hpp:224
int_t maxLidxCount
Definition: xlupanels.hpp:374
int64_t * d_lblock_gid_offsets
Definition: xlupanels.hpp:422
int_t dAncestorFactorBaseline(int_t alvl, sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: anc25d_impl.hpp:29
void initSCUMarshallData(int k_st, int k_end, int_t *perm_c_supno)
diagFactBufs_type< Ftype > ** initDiagFactBufsArr(int_t mxLeafNode, int_t ldt)
Definition: lupanels_impl.hpp:16
void marshallBatchedBufferCopyData(int k_st, int k_end, int_t *perm_c_supno)
int_t nsupers
Definition: xlupanels.hpp:342
void marshallBatchedLUData(int k_st, int k_end, int_t *perm_c_supno)
int_t ldt
Definition: xlupanels.hpp:340
int ** d_lblock_start_ptrs
Definition: xlupanels.hpp:421
int_t maxLvalCount
Definition: xlupanels.hpp:373
int_t dScatter(int_t m, int_t n, int_t gi, int_t gj, Ftype *V, int_t ldv, int_t *srcRowList, int_t *srcColList)
Definition: lupanels_impl.hpp:372
SCT_t * SCT
Definition: xlupanels.hpp:354
int_t zSendLPanel(int_t k0, int_t receiverGrid)
Definition: lupanels_comm3d_impl.hpp:62
int64_t total_l_blocks
Definition: xlupanels.hpp:423
trf3dpartitionType< Ftype > * trf3Dpartition
Definition: xlupanels.hpp:359
indirectMapType
Definition: xlupanels.hpp:429
@ ROW_MAP
Definition: xlupanels.hpp:430
@ COL_MAP
Definition: xlupanels.hpp:431
int_t zSendUPanel(int_t k0, int_t receiverGrid)
Definition: lupanels_comm3d_impl.hpp:100
std::vector< int_t > LidxSendCounts
Definition: xlupanels.hpp:388
int superlu_acc_offload
Definition: xlupanels.hpp:364
int * isNodeInMyGrid
Definition: xlupanels.hpp:347
int dsparseTreeFactorBatchGPU(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
int_t zRecvUPanel(int_t k0, int_t senderGrid, Ftype alpha, Ftype beta)
Definition: lupanels_comm3d_impl.hpp:117
std::vector< int_t > UidxSendCounts
Definition: xlupanels.hpp:389
std::vector< Ftype * > UvalRecvBufs
Definition: xlupanels.hpp:381
int_t ancestorReduction3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm)
Definition: lupanels_comm3d_impl.hpp:9
int_t supersize(int_t k)
Definition: xlupanels.hpp:405
int * d_lblock_gid_dat
Definition: xlupanels.hpp:420
void marshallBatchedSCUData(int k_st, int k_end, int_t *perm_c_supno)
int maxLeafNodes
Definition: xlupanels.hpp:361
xupanel_t< Ftype > * uPanelVec
Definition: xlupanels.hpp:337
int_t dPanelBcast(int_t k, int_t offset)
Definition: lupanels_impl.hpp:589
anc25d_t anc25d
Definition: xlupanels.hpp:409
int_t dAncestorFactor(int_t alvl, sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dAncestorFactor_impl.hpp:6
int_t zRecvLPanel(int_t k0, int_t senderGrid, Ftype alpha, Ftype beta)
Definition: lupanels_comm3d_impl.hpp:79
int_t * computeIndirectMap(indirectMapType direction, int_t srcLen, int_t *srcVec, int_t dstLen, int_t *dstVec)
Definition: lupanels_impl.hpp:340
int_t krow(int_t k)
Definition: xlupanels.hpp:402
int64_t * d_lblock_start_offsets
Definition: xlupanels.hpp:422
int_t dSchurComplementUpdate(int_t k, xlpanel_t< Ftype > &lpanel, xupanel_t< Ftype > &upanel)
Definition: lupanels_impl.hpp:314
std::vector< int_t > LvalSendCounts
Definition: xlupanels.hpp:386
int_t * indirectRow
Definition: xlupanels.hpp:345
int_t * xsup
Definition: xlupanels.hpp:341
std::vector< Ftype * > diagFactBufs
Definition: xlupanels.hpp:377
int_t dsparseTreeFactorBaseline(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:220
int_t iam
Definition: xlupanels.hpp:340
gridinfo3d_t * grid3d
Definition: xlupanels.hpp:338
xLUstructGPU_t< Ftype > * dA_gpu
Definition: xlupanels.hpp:411
int_t Pc
Definition: xlupanels.hpp:340
int * info
Definition: xlupanels.hpp:349
Distributed SuperLU data types and function prototypes.
#define CEILING(a, b)
Definition: superlu_defs.h:277
int sp_ienv_dist(int, superlu_dist_options_t *)
Definition: sp_ienv.c:80
int64_t int_t
Definition: superlu_defs.h:119
#define PNUM(i, j, grid)
Definition: superlu_defs.h:276
int j
Definition: sutil_dist.c:287
int i
Definition: sutil_dist.c:287
#define SUPERLU_FREE(addr)
Definition: util_dist.h:54