21#include "gpu_api_utils.h"
29#define MAX_NGPU_STREAMS 32
32void check(gpuError_t result,
char const *
const func,
const char *
const file,
int const line)
36 fprintf(stderr,
"GPU error at file %s: line %d code=(%s) \"%s\" \n",
37 file, line, gpuGetErrorString(result), func);
44#define checkGPUErrors(val) check ( (val), #val, __FILE__, __LINE__ )
55 double *Remain_L_buff;
56 double *Remain_L_buff_host;
61 int_t *lsub_buf, *usub_buf;
68 int_t* usub_IndirectJ3;
69 int_t* usub_IndirectJ3_host;
81 int_t *LnzvalPtr_host;
85 int_t *UrowindPtr_host;
86 int_t *UnzvalPtr_host;
93 int_t *local_l_blk_infoPtr;
98 int_t *local_u_blk_infoPtr;
101 dSCUbuf_gpu_t scubufs[MAX_NGPU_STREAMS];
102 double *acc_L_buff, *acc_U_buff;
111 double ScatterMOPCounter;
112 double ScatterMOPTimer;
113 double GemmFLOPCounter;
114 double GemmFLOPTimer;
118 double tHost_PCIeH2D;
119 double tHost_PCIeD2H;
123 gpuEvent_t *GemmStart, *GemmEnd, *ScatterEnd;
124 gpuEvent_t *ePCIeH2D;
125 gpuEvent_t *ePCIeD2H_Start;
126 gpuEvent_t *ePCIeD2H_End;
131 int_t first_l_block_gpu, first_u_block_gpu;
137 dLUstruct_gpu_t *A_gpu, *dA_gpu;
138 gpuStream_t funCallStreams[MAX_NGPU_STREAMS], CopyStream;
139 gpublasHandle_t gpublasHandles[MAX_NGPU_STREAMS];
140 int lastOffloadStream[MAX_NGPU_STREAMS];
164 int_t *gIperm_c_supno,
171 double thresh,
SCT_t *SCT,
int tag_ub,
185extern int dreduceGPUlu(
int last_flag,
d2Hreduce_t* d2Hred,
189extern int dwaitGPUscu(
int streamId, dsluGPU_t *sluGPU,
SCT_t *SCT);
192extern int dsendSCUdataHost2GPU(
194 int_t Remain_lbuf_send_size, dsluGPU_t *sluGPU,
HyP_t* HyP
197extern int dinitSluGPU3D_t(
204int dSchurCompUpdate_GPU(
208 int_t Remain_lbuf_send_size,
218extern void dCopyLUToGPU3D (
int* isNodeInMyGrid,
dLocalLU_t *A_host,
224extern int dreduceAllAncestors3d_GPU(
int_t ilvl,
int_t* myNodeCount,
232extern void dsyncAllfunCallStreams(dsluGPU_t* sluGPU,
SCT_t* SCT);
233extern int dfree_LUstruct_gpu (dsluGPU_t *sluGPU,
SuperLUStat_t *);
237extern void dPrint_matrix(
char *desc,
int_t m,
int_t n,
double *dA,
int_t lda );
int int_t
Definition: superlu_defs.h:114
int dsparseTreeFactor_ASYNC_GPU(sForest_t *sforest, commRequests_t **comReqss, scuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, dLUValSubBuf_t **LUvsbs, diagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, sluGPU_t *sluGPU, d2Hreduce_t *d2Hred, HyP_t *HyP, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
Definition: treeFactorizationGPU.c:44
double acc_async_cost
Definition: acc_aux.c:56
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: superlu_defs.h:435
Definition: superlu_ddefs.h:329
Definition: superlu_defs.h:770
Definition: util_dist.h:172
Definition: util_dist.h:95
Definition: superlu_defs.h:760
Definition: superlu_defs.h:924
Definition: superlu_defs.h:852
Definition: superlu_ddefs.h:357
Definition: superlu_ddefs.h:254
Definition: superlu_ddefs.h:97
Definition: superlu_ddefs.h:391
Definition: superlu_ddefs.h:385
Definition: superlu_defs.h:937
Definition: superlu_defs.h:839
Definition: superlu_defs.h:890
Definition: superlu_defs.h:398
Definition: superlu_defs.h:388
Definition: superlu_defs.h:815
Definition: superlu_defs.h:822
Definition: superlu_defs.h:947
Definition: superlu_ddefs.h:397
Definition: superlu_defs.h:901
Definition: superlu_defs.h:712
Distributed SuperLU data types and function prototypes.