16#define GLOBAL_BLOCK_NOT_FOUND -1
134 lpanelGPU_t copyToGPU();
135 lpanelGPU_t copyToGPU(
void *basePtr);
139 int_t panelSolveGPU(cublasHandle_t handle, cudaStream_t cuStream,
145 double *UBlk,
int_t LDU,
146 double *DiagLBlk,
int_t LDD,
147 double thresh,
int_t *xsup,
151 cusolverDnHandle_t cusolverH, cudaStream_t cuStream,
152 double *dWork,
int* dInfo,
153 double *dDiagBuf,
int_t LDD,
154 double thresh,
int_t *xsup,
158 double *blkPtrGPU(
int k)
178 upanelGPU_t gpuPanel;
205 if (
index == NULL)
return 0;
274 std::cout <<
"## Warning: Empty Panel"
280 if (
nzcols() != alternateNzcols)
282 printf(
"Error 175\n");
303 upanelGPU_t copyToGPU();
305 upanelGPU_t copyToGPU(
void *basePtr);
308 int_t panelSolveGPU(cublasHandle_t handle, cudaStream_t cuStream,
312 double *blkPtrGPU(
int k)
434 double thresh_,
int *info_);
486 for (
int stream = 0; stream <
A_gpu.numCudaStreams; stream++)
488 cusolverDnDestroy(
A_gpu.cuSolveHandles[stream]);
489 cublasDestroy(
A_gpu.cuHandles[stream]);
490 cublasDestroy(
A_gpu.lookAheadLHandle[stream]);
491 cublasDestroy(
A_gpu.lookAheadUHandle[stream]);
512 double *V,
int_t ldv,
576 int_t setLUstruct_GPU();
577 int_t dsparseTreeFactorGPU(
582 int_t dsparseTreeFactorGPUBaseline(
588 int_t dAncestorFactorBaselineGPU(
595 int_t dSchurComplementUpdateGPU(
598 int_t dSchurCompUpdatePartGPU(
601 cublasHandle_t handle, cudaStream_t cuStream,
603 int_t lookAheadUpdateGPU(
606 int_t dSchurCompUpLimitedMem(
611 int_t dSchurCompUpdateExcludeOneGPU(
622 int_t zRecvLPanelGPU(
int_t k0,
int_t senderGrid,
double alpha,
double beta);
624 int_t zRecvUPanelGPU(
int_t k0,
int_t senderGrid,
double alpha,
double beta);
625 int_t copyLUGPUtoHost();
626 int_t copyLUHosttoGPU();
632 int_t SyncLookAheadUpdate(
int streamId);
634 double *gpuLvalBasePtr, *gpuUvalBasePtr;
635 int_t *gpuLidxBasePtr, *gpuUidxBasePtr;
636 size_t gpuLvalSize, gpuUvalSize, gpuLidxSize, gpuUidxSize;
638 lpanelGPU_t* copyLpanelsToGPU();
639 upanelGPU_t* copyUpanelsToGPU();
Definition: lupanels.hpp:19
int_t isEmpty()
Definition: lupanels.hpp:104
lpanel_t()
Definition: lupanels.hpp:40
int_t gid(int_t k)
Definition: lupanels.hpp:60
int_t LDA()
Definition: lupanels.hpp:97
int_t nbrow(int_t k)
Definition: lupanels.hpp:66
int_t * rowList(int_t k)
Definition: lupanels.hpp:77
size_t blkPtrOffset(int_t k)
Definition: lupanels.hpp:92
int_t nblocks()
Definition: lupanels.hpp:50
int_t haveDiag()
Definition: lupanels.hpp:56
int_t nzrows()
Definition: lupanels.hpp:55
int_t panelSolve(int_t ksupsz, double *DiagBlk, int_t LDD)
Definition: l_panels.cpp:60
int_t nzvalSize()
Definition: lupanels.hpp:105
int_t find(int_t k)
Definition: l_panels.cpp:49
int_t packDiagBlock(double *DiagLBlk, int_t LDD)
Definition: l_panels.cpp:88
size_t totalSize()
Definition: lupanels.hpp:119
double * val
Definition: lupanels.hpp:22
int getEndBlock(int iSt, int maxRows)
Definition: l_panels.cpp:100
int_t indexSize()
Definition: lupanels.hpp:112
double * blkPtr(int_t k)
Definition: lupanels.hpp:87
int_t * index
Definition: lupanels.hpp:21
int_t stRow(int k)
Definition: lupanels.hpp:72
int_t diagFactor(int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
Definition: l_panels.cpp:78
lpanel_t(int_t *index_, double *val_)
Definition: lupanels.hpp:47
int_t ncols()
Definition: lupanels.hpp:57
Definition: lupanels.hpp:172
int_t LDA()
Definition: lupanels.hpp:208
int_t nbcol(int_t k)
Definition: lupanels.hpp:217
int_t * index
Definition: lupanels.hpp:174
upanel_t(int_t *index_, double *val_)
Definition: lupanels.hpp:197
double * blkPtr(int_t k)
Definition: lupanels.hpp:232
int_t nblocks()
Definition: lupanels.hpp:199
int_t indexSize()
Definition: lupanels.hpp:260
int_t packed2skyline(int_t k, int_t *usub, double *uval, int_t *xsup)
Definition: u_panels.cpp:74
int_t stCol(int k)
Definition: lupanels.hpp:289
int_t nzvalSize()
Definition: lupanels.hpp:253
size_t totalSize()
Definition: lupanels.hpp:266
upanel_t()
Definition: lupanels.hpp:190
int_t nzcols()
Definition: lupanels.hpp:204
int_t isEmpty()
Definition: lupanels.hpp:252
int_t gid(int_t k)
Definition: lupanels.hpp:211
int_t find(int_t k)
Definition: u_panels.cpp:110
int_t diagFactor(int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
int_t * colList(int_t k)
Definition: lupanels.hpp:222
int getEndBlock(int jSt, int maxCols)
Definition: u_panels.cpp:131
int_t panelSolve(int_t ksupsz, double *DiagBlk, int_t LDD)
Definition: u_panels.cpp:121
int_t checkCorrectness()
Definition: lupanels.hpp:270
size_t blkPtrOffset(int_t k)
Definition: lupanels.hpp:237
double * val
Definition: lupanels.hpp:175
#define UPANEL_HEADER_SIZE
Definition: lu_common.hpp:7
#define LPANEL_HEADER_SIZE
Definition: lu_common.hpp:6
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: lupanels.hpp:330
std::vector< bcastStruct > bcastLval
Definition: lupanels.hpp:390
int numDiagBufs
Definition: lupanels.hpp:346
int_t supersize(int_t k)
Definition: lupanels.hpp:398
int_t myrow
Definition: lupanels.hpp:335
std::vector< int_t > UidxSendCounts
Definition: lupanels.hpp:385
std::vector< bcastStruct > bcastLidx
Definition: lupanels.hpp:392
void dFactBatchSolve(int k_st, int k_end, int_t *perm_c_supno)
int_t packedU2skyline(dLUstruct_t *LUstruct)
Definition: lupanels.cpp:379
void marshallBatchedBufferCopyData(int k_st, int k_end, int_t *perm_c_supno)
int_t * indirect
Definition: lupanels.hpp:340
void marshallBatchedTRSMUData(int k_st, int k_end, int_t *perm_c_supno)
std::vector< int_t * > UidxRecvBufs
Definition: lupanels.hpp:379
int marshallSCUBatchedDataOuter(int k_st, int k_end, int_t *perm_c_supno)
int * isNodeInMyGrid
Definition: lupanels.hpp:342
int_t dSchurCompUpdateExcludeOne(int_t k, int_t ex, lpanel_t &lpanel, upanel_t &upanel)
Definition: lupanels.cpp:476
int_t zRecvUPanel(int_t k0, int_t senderGrid, double alpha, double beta)
Definition: lupanels_comm3d.cpp:114
int_t kcol(int_t k)
Definition: lupanels.hpp:396
LUstructGPU_t * dA_gpu
Definition: lupanels.hpp:405
int64_t total_start_size
Definition: lupanels.hpp:417
int nThreads
Definition: lupanels.hpp:339
int_t dsparseTreeFactorBaseline(sForest_t *sforest, ddiagFactBufs_t **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked.cpp:216
int_t dDiagFactorPanelSolve(int_t k, int_t offset, ddiagFactBufs_t **dFBufs)
Definition: lupanels.cpp:505
void marshallBatchedTRSMLData(int k_st, int k_end, int_t *perm_c_supno)
int maxLeafNodes
Definition: lupanels.hpp:357
int ** d_lblock_gid_ptrs
Definition: lupanels.hpp:414
void initSCUMarshallData(int k_st, int k_end, int_t *perm_c_supno)
SCT_t * SCT
Definition: lupanels.hpp:349
SuperLUStat_t * stat
Definition: lupanels.hpp:351
std::vector< double * > UvalRecvBufs
Definition: lupanels.hpp:377
int_t ldt
Definition: lupanels.hpp:335
int * d_lblock_start_dat
Definition: lupanels.hpp:415
int_t ancestorReduction3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm)
Definition: lupanels_comm3d.cpp:6
std::vector< bcastStruct > bcastDiagRow
Definition: lupanels.hpp:388
upanel_t * uPanelVec
Definition: lupanels.hpp:332
int_t zSendUPanel(int_t k0, int_t receiverGrid)
Definition: lupanels_comm3d.cpp:97
std::vector< bcastStruct > bcastDiagCol
Definition: lupanels.hpp:389
ddiagFactBufs_t ** dFBufs
Definition: lupanels.hpp:359
int64_t total_l_blocks
Definition: lupanels.hpp:417
std::vector< int_t * > LidxRecvBufs
Definition: lupanels.hpp:378
int_t * indirectRow
Definition: lupanels.hpp:340
std::vector< int_t > UvalSendCounts
Definition: lupanels.hpp:383
LUstruct_v100(int_t nsupers, int_t ldt_, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT_, superlu_dist_options_t *options_, SuperLUStat_t *stat, double thresh_, int *info_)
Definition: lupanels.cpp:44
int superlu_acc_offload
Definition: lupanels.hpp:360
int ** d_lblock_start_ptrs
Definition: lupanels.hpp:415
double * bigV
Definition: lupanels.hpp:341
int dsparseTreeFactorBatchGPU(sForest_t *sforest, ddiagFactBufs_t **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
int_t krow(int_t k)
Definition: lupanels.hpp:395
int_t zSendLPanel(int_t k0, int_t receiverGrid)
Definition: lupanels_comm3d.cpp:59
int_t blockUpdate(int_t k, int_t ii, int_t jj, lpanel_t &lpanel, upanel_t &upanel)
Definition: lupanels.cpp:446
int marshallSCUBatchedDataInner(int k_st, int k_end, int_t *perm_c_supno)
superlu_dist_options_t * options
Definition: lupanels.hpp:350
int_t pdgstrf3d()
Definition: pdgstrf3d_upacked.cpp:211
std::vector< double * > LvalRecvBufs
Definition: lupanels.hpp:376
int_t procIJ(int_t i, int_t j)
Definition: lupanels.hpp:397
int_t dsparseTreeFactor(sForest_t *sforest, ddiagFactBufs_t **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked.cpp:5
int_t * xsup
Definition: lupanels.hpp:336
anc25d_t anc25d
Definition: lupanels.hpp:402
int_t maxLvalCount
Definition: lupanels.hpp:369
int_t maxUidxCount
Definition: lupanels.hpp:372
int_t dPanelBcast(int_t k, int_t offset)
Definition: lupanels.cpp:535
indirectMapType
Definition: lupanels.hpp:423
@ ROW_MAP
Definition: lupanels.hpp:424
@ COL_MAP
Definition: lupanels.hpp:425
std::vector< int_t > LvalSendCounts
Definition: lupanels.hpp:382
int_t g2lCol(int_t k)
Definition: lupanels.hpp:400
double thresh
Definition: lupanels.hpp:343
dLocalLU_t * host_Llu
Definition: lupanels.hpp:411
~LUstruct_v100()
Definition: lupanels.hpp:436
int_t * indirectCol
Definition: lupanels.hpp:340
dLocalLU_t d_localLU
Definition: lupanels.hpp:412
int_t maxUvalCount
Definition: lupanels.hpp:371
void marshallBatchedSCUData(int k_st, int k_end, int_t *perm_c_supno)
void marshallBatchedLUData(int k_st, int k_end, int_t *perm_c_supno)
lpanel_t * lPanelVec
Definition: lupanels.hpp:331
int_t mycol
Definition: lupanels.hpp:335
std::vector< double * > diagFactBufs
Definition: lupanels.hpp:373
LUstructGPU_t A_gpu
Definition: lupanels.hpp:406
int_t lookAheadUpdate(int_t k, int_t laIdx, lpanel_t &lpanel, upanel_t &upanel)
Definition: lupanels.cpp:407
int64_t * d_lblock_start_offsets
Definition: lupanels.hpp:416
int * info
Definition: lupanels.hpp:344
int_t maxLvl
Definition: lupanels.hpp:356
std::vector< bcastStruct > bcastUval
Definition: lupanels.hpp:391
std::vector< int_t > LidxSendCounts
Definition: lupanels.hpp:384
int_t * computeIndirectMap(indirectMapType direction, int_t srcLen, int_t *srcVec, int_t dstLen, int_t *dstVec)
Definition: lupanels.cpp:292
int_t nsupers
Definition: lupanels.hpp:337
int_t Pr
Definition: lupanels.hpp:335
std::vector< bcastStruct > bcastUidx
Definition: lupanels.hpp:393
dtrf3Dpartition_t * trf3Dpartition
Definition: lupanels.hpp:355
int_t dAncestorFactorBaseline(int_t alvl, sForest_t *sforest, ddiagFactBufs_t **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: anc25d.cpp:25
int_t maxLidxCount
Definition: lupanels.hpp:370
int_t g2lRow(int_t k)
Definition: lupanels.hpp:399
int64_t * d_lblock_gid_offsets
Definition: lupanels.hpp:416
int_t iam
Definition: lupanels.hpp:335
int_t dSchurComplementUpdate(int_t k, lpanel_t &lpanel, upanel_t &upanel)
Definition: lupanels.cpp:267
gridinfo3d_t * grid3d
Definition: lupanels.hpp:333
gridinfo_t * grid
Definition: lupanels.hpp:334
int_t zRecvLPanel(int_t k0, int_t senderGrid, double alpha, double beta)
Definition: lupanels_comm3d.cpp:76
int_t Pc
Definition: lupanels.hpp:335
int_t dScatter(int_t m, int_t n, int_t gi, int_t gj, double *V, int_t ldv, int_t *srcRowList, int_t *srcColList)
Definition: lupanels.cpp:323
int * d_lblock_gid_dat
Definition: lupanels.hpp:414
Definition: util_dist.h:199
Definition: util_dist.h:101
Definition: anc25d.hpp:13
Definition: superlu_ddefs.h:340
Definition: superlu_ddefs.h:97
Definition: superlu_ddefs.h:467
Definition: superlu_ddefs.h:318
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
Definition: superlu_defs.h:404
Definition: superlu_defs.h:989
Definition: superlu_defs.h:728
int num_lookaheads
Definition: superlu_defs.h:757
Distributed SuperLU data types and function prototypes.
int dfreeDiagFactBufsArr(int mxLeafNode, ddiagFactBufs_t **dFBufs)
#define CEILING(a, b)
Definition: superlu_defs.h:277
int sp_ienv_dist(int, superlu_dist_options_t *)
Definition: sp_ienv.c:80
int64_t int_t
Definition: superlu_defs.h:119
#define PNUM(i, j, grid)
Definition: superlu_defs.h:276
int j
Definition: sutil_dist.c:287
int i
Definition: sutil_dist.c:287
#define SUPERLU_FREE(addr)
Definition: util_dist.h:54