20#include "gpu_api_utils.h"
28#define MAX_NGPU_STREAMS 32
31void check(gpuError_t result,
char const *
const func,
const char *
const file,
int const line)
35 fprintf(stderr,
"GPU error at file %s: line %d code=(%s) \"%s\" \n",
36 file, line, gpuGetErrorString(result), func);
43#define checkGPUErrors(val) check ( (val), #val, __FILE__, __LINE__ )
60 int_t *lsub_buf, *usub_buf;
67 int_t* usub_IndirectJ3;
68 int_t* usub_IndirectJ3_host;
80 int_t *LnzvalPtr_host;
84 int_t *UrowindPtr_host;
85 int_t *UnzvalPtr_host;
92 int_t *local_l_blk_infoPtr;
97 int_t *local_u_blk_infoPtr;
102 zSCUbuf_gpu_t scubufs[MAX_NGPU_STREAMS];
111 double ScatterMOPCounter;
112 double ScatterMOPTimer;
113 double GemmFLOPCounter;
114 double GemmFLOPTimer;
118 double tHost_PCIeH2D;
119 double tHost_PCIeD2H;
123 gpuEvent_t *GemmStart, *GemmEnd, *ScatterEnd;
124 gpuEvent_t *ePCIeH2D;
125 gpuEvent_t *ePCIeD2H_Start;
126 gpuEvent_t *ePCIeD2H_End;
130 int_t first_l_block_gpu, first_u_block_gpu;
136 zLUstruct_gpu_t *A_gpu, *dA_gpu;
137 gpuStream_t funCallStreams[MAX_NGPU_STREAMS], CopyStream;
138 gpublasHandle_t gpublasHandles[MAX_NGPU_STREAMS];
139 int_t lastOffloadStream[MAX_NGPU_STREAMS];
150extern int zsparseTreeFactor_ASYNC_GPU(
163 int_t *gIperm_c_supno,
170 double thresh,
SCT_t *SCT,
int tag_ub,
184extern int zreduceGPUlu(
int last_flag,
d2Hreduce_t* d2Hred,
188extern int zwaitGPUscu(
int streamId, zsluGPU_t *sluGPU,
SCT_t *SCT);
189extern int zsendLUpanelGPU2HOST(
int_t k0,
d2Hreduce_t* d2Hred, zsluGPU_t *sluGPU);
190extern int zsendSCUdataHost2GPU(
192 int_t Remain_lbuf_send_size, zsluGPU_t *sluGPU,
HyP_t* HyP
195extern int zinitSluGPU3D_t(
201int zSchurCompUpdate_GPU(
205 int_t Remain_lbuf_send_size,
214extern void zCopyLUToGPU3D (
int* isNodeInMyGrid,
zLocalLU_t *A_host,
218extern int zreduceAllAncestors3d_GPU(
int_t ilvl,
int_t* myNodeCount,
224extern void zsyncAllfunCallStreams(zsluGPU_t* sluGPU,
SCT_t* SCT);
225extern int zfree_LUstruct_gpu (zLUstruct_gpu_t *A_gpu);
232void zprintGPUStats(zLUstruct_gpu_t *A_gpu);
int int_t
Definition: superlu_defs.h:114
double acc_async_cost
Definition: acc_aux.c:56
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: superlu_defs.h:435
Definition: superlu_ddefs.h:329
Definition: superlu_defs.h:770
Definition: util_dist.h:172
Definition: util_dist.h:95
Definition: superlu_defs.h:760
Definition: superlu_defs.h:924
Definition: superlu_defs.h:852
Definition: dcomplex.h:30
Definition: superlu_defs.h:937
Definition: superlu_defs.h:839
Definition: superlu_defs.h:890
Definition: superlu_defs.h:398
Definition: superlu_defs.h:388
Definition: superlu_defs.h:815
Definition: superlu_defs.h:822
Definition: superlu_defs.h:947
Definition: superlu_ddefs.h:397
Definition: superlu_defs.h:901
Definition: superlu_defs.h:712
Definition: superlu_zdefs.h:357
Definition: superlu_zdefs.h:254
Definition: superlu_zdefs.h:97
Definition: superlu_zdefs.h:391
Definition: superlu_zdefs.h:385
Distributed SuperLU data types and function prototypes.