28#define MAX_NGPU_STREAMS 32
31void check(gpuError_t result,
char const *
const func,
const char *
const file,
int const line)
35 fprintf(stderr,
"GPU error at file %s: line %d code=(%s) \"%s\" \n",
36 file, line, gpuGetErrorString(result), func);
43#define checkGPUErrors(val) check ( (val), #val, __FILE__, __LINE__ )
110 double ScatterMOPCounter;
111 double ScatterMOPTimer;
112 double GemmFLOPCounter;
113 double GemmFLOPTimer;
117 double tHost_PCIeH2D;
118 double tHost_PCIeD2H;
122 gpuEvent_t *GemmStart, *GemmEnd, *ScatterEnd;
123 gpuEvent_t *ePCIeH2D;
124 gpuEvent_t *ePCIeD2H_Start;
125 gpuEvent_t *ePCIeD2H_End;
163 int_t *gIperm_c_supno,
170 double thresh,
SCT_t *SCT,
int tag_ub,
207 int_t Remain_lbuf_send_size,
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: superlu_defs.h:451
Definition: superlu_defs.h:854
Definition: superlu_defs.h:799
Definition: util_dist.h:199
Definition: util_dist.h:101
Definition: superlu_defs.h:789
Definition: superlu_defs.h:1012
Definition: superlu_defs.h:940
Definition: dcomplex.h:30
Definition: superlu_defs.h:1025
Definition: superlu_defs.h:927
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
Definition: superlu_defs.h:404
Definition: superlu_defs.h:903
Definition: superlu_defs.h:910
Definition: superlu_defs.h:1034
Definition: superlu_defs.h:844
Definition: superlu_defs.h:989
Definition: superlu_defs.h:728
Definition: superlu_zdefs.h:310
Definition: zlustruct_gpu.h:74
int_t * local_u_blk_infoPtr
Definition: zlustruct_gpu.h:97
int_t * xsup
Definition: zlustruct_gpu.h:106
int_t * perm_c_supno
Definition: zlustruct_gpu.h:129
int_t * LrowindVec
Definition: zlustruct_gpu.h:75
local_u_blk_info_t * local_u_blk_infoVec
Definition: zlustruct_gpu.h:95
int_t * UrowindPtr
Definition: zlustruct_gpu.h:83
doublecomplex * acc_L_buff
Definition: zlustruct_gpu.h:101
int_t * UrowindPtr_host
Definition: zlustruct_gpu.h:84
doublecomplex * UnzvalVec
Definition: zlustruct_gpu.h:87
int_t * jib_lookupVec
Definition: zlustruct_gpu.h:93
int_t * local_l_blk_infoPtr
Definition: zlustruct_gpu.h:92
int_t buffer_size
Definition: zlustruct_gpu.h:104
int_t * xsup_host
Definition: zlustruct_gpu.h:128
local_l_blk_info_t * local_l_blk_infoVec
Definition: zlustruct_gpu.h:91
int_t * LnzvalPtr_host
Definition: zlustruct_gpu.h:80
int_t nsupers
Definition: zlustruct_gpu.h:105
doublecomplex * LnzvalVec
Definition: zlustruct_gpu.h:78
int_t first_l_block_gpu
Definition: zlustruct_gpu.h:130
int_t * UrowindVec
Definition: zlustruct_gpu.h:82
int_t * jib_lookupPtr
Definition: zlustruct_gpu.h:94
int_t * LrowindPtr
Definition: zlustruct_gpu.h:76
int_t * UnzvalPtr_host
Definition: zlustruct_gpu.h:85
int_t * LnzvalPtr
Definition: zlustruct_gpu.h:79
int_t * UnzvalPtr
Definition: zlustruct_gpu.h:88
Definition: superlu_zdefs.h:340
Definition: superlu_zdefs.h:97
Definition: zlustruct_gpu.h:46
Ublock_info_t * Ublock_info
Definition: zlustruct_gpu.h:62
doublecomplex * bigV
Definition: zlustruct_gpu.h:48
Remain_info_t * Remain_info
Definition: zlustruct_gpu.h:63
doublecomplex * bigU_host
Definition: zlustruct_gpu.h:50
int_t * indirect2
Definition: zlustruct_gpu.h:52
int_t * usub_IndirectJ3_host
Definition: zlustruct_gpu.h:68
doublecomplex * Remain_L_buff
Definition: zlustruct_gpu.h:54
doublecomplex * Remain_L_buff_host
Definition: zlustruct_gpu.h:55
int_t * usub
Definition: zlustruct_gpu.h:58
doublecomplex * bigU
Definition: zlustruct_gpu.h:49
int_t * lsub_buf
Definition: zlustruct_gpu.h:60
int_t * lsub
Definition: zlustruct_gpu.h:57
int_t * usub_IndirectJ3
Definition: zlustruct_gpu.h:67
int_t * indirect
Definition: zlustruct_gpu.h:51
Remain_info_t * Remain_info_host
Definition: zlustruct_gpu.h:65
Ublock_info_t * Ublock_info_host
Definition: zlustruct_gpu.h:64
Definition: superlu_zdefs.h:467
Definition: superlu_zdefs.h:461
Definition: zlustruct_gpu.h:134
int * isNodeInMyGrid
Definition: zlustruct_gpu.h:141
gpuStream_t CopyStream
Definition: zlustruct_gpu.h:137
int nGPUStreams
Definition: zlustruct_gpu.h:140
double acc_async_cost
Definition: zlustruct_gpu.h:142
zLUstruct_gpu_t * A_gpu
Definition: zlustruct_gpu.h:136
int64_t int_t
Definition: superlu_defs.h:119
Distributed SuperLU data types and function prototypes.
static void check(gpuError_t result, char const *const func, const char *const file, int const line)
Definition: zlustruct_gpu.h:31
int zinitD2Hreduce(int next_k, d2Hreduce_t *d2Hred, int last_flag, HyP_t *HyP, zsluGPU_t *sluGPU, gridinfo_t *grid, zLUstruct_t *LUstruct, SCT_t *SCT)
int zfree_LUstruct_gpu(zsluGPU_t *sluGPU, SuperLUStat_t *)
int zwaitGPUscu(int streamId, zsluGPU_t *sluGPU, SCT_t *SCT)
int zreduceGPUlu(int last_flag, d2Hreduce_t *d2Hred, zsluGPU_t *sluGPU, SCT_t *SCT, gridinfo_t *grid, zLUstruct_t *LUstruct)
int zSchurCompUpdate_GPU(int_t streamId, int_t jj_cpu, int_t nub, int_t klst, int_t knsupc, int_t Rnbrow, int_t RemainBlk, int_t Remain_lbuf_send_size, int_t bigu_send_size, int_t ldu, int_t mcb, int_t buffer_size, int_t lsub_len, int_t usub_len, int_t ldt, int_t k0, zsluGPU_t *sluGPU, gridinfo_t *grid, SuperLUStat_t *)
int zreduceAllAncestors3d_GPU(int_t ilvl, int_t *myNodeCount, int_t **treePerm, zLUValSubBuf_t *LUvsb, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, zsluGPU_t *sluGPU, d2Hreduce_t *d2Hred, factStat_t *factStat, HyP_t *HyP, SCT_t *SCT, SuperLUStat_t *)
int zinitSluGPU3D_t(zsluGPU_t *sluGPU, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int_t *perm_c_supno, int_t n, int_t buffer_size, int_t bigu_size, int_t ldt, SuperLUStat_t *)
void zCopyLUToGPU3D(int *isNodeInMyGrid, zLocalLU_t *A_host, zsluGPU_t *sluGPU, Glu_persist_t *Glu_persist, int_t n, gridinfo3d_t *grid3d, int_t buffer_size, int_t bigu_size, int_t ldt, SuperLUStat_t *)
#define MAX_NGPU_STREAMS
Definition: zlustruct_gpu.h:28
int zsendLUpanelGPU2HOST(int_t k0, d2Hreduce_t *d2Hred, zsluGPU_t *sluGPU, SuperLUStat_t *)
int zsendSCUdataHost2GPU(int_t streamId, int_t *lsub, int_t *usub, doublecomplex *bigU, int_t bigu_send_size, int_t Remain_lbuf_send_size, zsluGPU_t *sluGPU, HyP_t *HyP)
void zsyncAllfunCallStreams(zsluGPU_t *sluGPU, SCT_t *SCT)
int zsparseTreeFactor_ASYNC_GPU(sForest_t *sforest, commRequests_t **comReqss, zscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, zLUValSubBuf_t **LUvsbs, zdiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int ldt, zsluGPU_t *sluGPU, d2Hreduce_t *d2Hred, HyP_t *HyP, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
void zPrint_matrix(char *desc, int_t m, int_t n, doublecomplex *dA, int_t lda)