21#ifndef __SUPERLU_DIST_UTIL
22#define __SUPERLU_DIST_UTIL
35#define USER_ABORT(msg) superlu_abort_and_exit_dist(msg)
38#define ABORT(err_msg) \
40 sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
45#define USER_MALLOC(size) superlu_malloc_dist(size)
48#define SUPERLU_MALLOC(size) USER_MALLOC(size)
51#define USER_FREE(addr) superlu_free_dist(addr)
54#define SUPERLU_FREE(addr) USER_FREE(addr)
56#define CHECK_MALLOC(pnum, where) { \
57 extern long int superlu_malloc_total; \
58 printf("(%d) %s: superlu_malloc_total (MB) %.6f\n", \
59 pnum, where, superlu_malloc_total*1e-6); \
63#define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) )
64#define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) )
67#define MPI_REQ_ALLOC(x) ((MPI_Request *) SUPERLU_MALLOC ( (x) * sizeof (MPI_Request)))
68#define INT_T_ALLOC(x) ((int_t *) SUPERLU_MALLOC ( (x) * sizeof (int_t)))
69#define DOUBLE_ALLOC(x) ((double *) SUPERLU_MALLOC ( (x) * sizeof (double)))
83#define MAX_3D_LEVEL 32
85#define CACHE_LINE_SIZE 8
109 float current_buffer;
112 int_t MaxActiveBTrees;
113 int_t MaxActiveRTrees;
116 double ScatterMOPCounter;
117 double ScatterMOPTimer;
118 double GemmFLOPCounter;
119 double GemmFLOPTimer;
123 double tHost_PCIeH2D;
124 double tHost_PCIeD2H;
128 gpuEvent_t *GemmStart, *GemmEnd, *ScatterEnd;
129 gpuEvent_t *ePCIeH2D;
130 gpuEvent_t *ePCIeD2H_Start;
131 gpuEvent_t *ePCIeD2H_End;
152#define SuperLU_GluIntArray(n) (5 * (n) + 5)
155#define SuperLU_NO_MEMTYPE 6
165#define SuperLU_StackFull(x) ( x + stack.used >= stack.size )
166#define SuperLU_NotDoubleAlign(addr) ( (long)addr & 7 )
167#define SuperLU_DoubleAlign(addr) ( ((long)addr + 7) & ~7L )
168#define SuperLU_TempSpace(n, w) ( (2*w + 4 + NO_MARKER)*m*sizeof(int) + \
169 (w + 1)*n*sizeof(double) )
170#define SuperLU_Reduce(alpha) ((alpha + 1) / 2)
172#define SuperLU_FIRSTCOL_OF_SNODE(i) (xsup[i])
174#if defined(PROFlevel) && PROFlevel>=1
175#define TIC(t) t = SuperLU_timer_()
176#define TOC(t2, t1) t2 = SuperLU_timer_() - t1
185#define SuperLU_L_SUB_START(col) ( Lstore->rowind_colptr[col] )
186#define SuperLU_L_SUB(ptr) ( Lstore->rowind[ptr] )
187#define SuperLU_L_NZ_START(col) ( Lstore->nzval_colptr[col] )
188#define SuperLU_L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] )
189#define SuperLU_U_NZ_START(col) ( Ustore->colptr[col] )
190#define SuperLU_U_SUB(ptr) ( Ustore->rowind[ptr] )
200 int_t datatransfer_count;
201 int_t schurPhiCallCount;
202 int_t PhiMemCpyCounter;
203 double acc_load_imbal;
204 double LookAheadGEMMFlOp;
205 double PhiWaitTimer_2;
206 double LookAheadGEMMTimer;
207 double LookAheadRowSepTimer;
208 double LookAheadScatterTimer;
211 double scatter_mem_op_counter;
212 double LookAheadRowSepMOP ;
213 double scatter_mem_op_timer;
214 double schur_flop_counter;
215 double schur_flop_timer;
216 double CPUOffloadTimer;
218 double NetSchurUpTimer;
219 double AssemblyTimer;
220 double PhiMemCpyTimer;
221 double datatransfer_timer;
222 double LookAheadScatterMOP;
223 double schurPhiCallTimer;
225 double *Predicted_acc_sch_time;
226 double *Predicted_acc_gemm_time;
227 double *Predicted_acc_scatter_time;
231 double offloadable_flops;
232 double offloadable_mops;
234 double *SchurCompUdtThreadTime;
235 double *Predicted_host_sch_time;
236 double *Measured_host_sch_time;
238#ifdef SCATTER_PROFILE
239 double *Host_TheadScatterMOP ;
240 double *Host_TheadScatterTimer;
243#ifdef OFFLOAD_PROFILE
244 double *Predicted_acc_scatter_time_strat1;
245 double *Predicted_host_sch_time_strat1;
246 size_t pci_transfer_count[18];
247 double pci_transfer_time[18];
248 double pci_transfer_prediction_error[18];
253 double pdgstrs2_timer;
254 double pdgstrf2_timer;
255 double lookaheadupdatetimer;
264 double *Local_Dgstrf2_Thread_tl;
266 double Wait_UDiagBlock_Recv_tl;
268 double Wait_LDiagBlock_Recv_tl;
272 double Recv_UDiagBlock_tl;
274 double Wait_UDiagBlockSend_tl;
276 double L_PanelUpdate_tl;
278 double Bcast_UPanel_tl;
279 double Bcast_LPanel_tl;
281 double Wait_LSend_tl;
284 double Wait_USend_tl;
286 double Wait_URecv_tl;
288 double Wait_LRecv_tl;
291 double *GetAijLock_Thread_tl;
297 double Phase_Factor_tl;
298 double Phase_LU_Update_tl;
299 double Phase_SC_Update_tl;
303 double gatherLUtimer;
308 double tAsyncPipeTail;
314 double commVolFactor;
int int_t
Definition: superlu_defs.h:114
unsigned char Logical
Definition: util_dist.h:87
float flops_t
Definition: util_dist.h:86
struct e_node SuperLU_ExpHeader
Definition: util_dist.h:172
Definition: util_dist.h:95
Definition: util_dist.h:116
Definition: util_dist.h:111
int size
Definition: util_dist.h:112
void * mem
Definition: util_dist.h:113
enum constants header file
#define CBLOCK
Definition: util_dist.h:84
#define MAX_3D_LEVEL
Definition: util_dist.h:83
#define CSTEPPING
Definition: util_dist.h:86