5#include "mapsampler_api.h"
31template <
typename Ftype>
73 printf(
"MPI tag upper bound = %d\n", tag_ub);
79 int_t *iperm_c_supno = trf3Dpartition->iperm_c_supno;
80 int_t *myNodeCount = trf3Dpartition->myNodeCount;
81 int_t *myTreeIdxs = trf3Dpartition->myTreeIdxs;
82 int_t *myZeroTrIdxs = trf3Dpartition->myZeroTrIdxs;
83 sForest_t **sForests = trf3Dpartition->sForests;
84 int_t **treePerm = trf3Dpartition->treePerm;
92 for (
int ilvl = 0; ilvl < maxLvl; ++ilvl)
94 if (sForests[myTreeIdxs[ilvl]] && sForests[myTreeIdxs[ilvl]]->topoInfo.eTreeTopLims[1] > mxLeafNode)
108 SCT, options, stat, thresh, info);
111 printf(
"Time to intialize New DS= %g\n", tConst);
114 MPI_Barrier(grid3d->
comm);
121 for (
int_t ilvl = 0; ilvl < maxLvl; ++ilvl)
124 if (!myZeroTrIdxs[ilvl])
127 sForest_t *sforest = sForests[myTreeIdxs[ilvl]];
134 if (superlu_acc_offload)
136 LU_packed.dsparseTreeFactorGPU(sforest, dFBufs,
150 if (ilvl < maxLvl - 1)
152 if (superlu_acc_offload)
157 LU_packed.checkGPU();
160 LU_packed.ancestorReduction3dGPU(ilvl, myNodeCount, treePerm);
162 LU_packed.checkGPU();
176 MPI_Barrier(grid3d->
comm);
182 if (superlu_acc_offload)
185 cudaStreamSynchronize(LU_packed.
A_gpu.cuStreams[0]);
186 LU_packed.copyLUGPUtoHost();
192 printf(
"Time to send data back= %g\n", tXferGpu2Host);
206 allinea_stop_sampling();
223template <
typename Ftype>
229 int_t *myNodeCount = trf3Dpartition->myNodeCount;
230 int_t *myTreeIdxs = trf3Dpartition->myTreeIdxs;
231 int_t *myZeroTrIdxs = trf3Dpartition->myZeroTrIdxs;
232 sForest_t **sForests = trf3Dpartition->sForests;
233 int_t **treePerm = trf3Dpartition->treePerm;
237 printf(
".maxLvl %d\n", maxLvl);
243 if (getenv(
"ANC25D"))
244 useAnc25D = atoi(getenv(
"ANC25D"));
246 printf(
"-- Using ANC25D; ONLY CPU supported \n");
248 for (
int ilvl = 0; ilvl < maxLvl; ++ilvl)
253 sForest_t *sforest = sForests[myTreeIdxs[ilvl]];
257 if (superlu_acc_offload)
259 printf(
"-- ANC25D on GPU is not working yet!!!!! \n");
261 dsparseTreeFactorGPU(sforest, dFBufs,
265 dAncestorFactorBaselineGPU(ilvl, sforest, dFBufs,
276 dAncestorFactorBaseline(ilvl, sforest, dFBufs,
283 sForests[myTreeIdxs[ilvl]]->
cost = SCT->tFactor3D[ilvl];
289 if (!myZeroTrIdxs[ilvl])
292 sForest_t *sforest = sForests[myTreeIdxs[ilvl]];
299 if ( superlu_acc_offload ) {
300 if ( options->batchCount==0 )
301 dsparseTreeFactorGPU(sforest, dFBufs, &gEtreeInfo, tag_ub);
303 printf(
"Batch ERROR: should not get to this branch!\n");
316 sForests[myTreeIdxs[ilvl]]->
cost = SCT->tFactor3D[ilvl];
319 if (ilvl < maxLvl - 1)
321 if (superlu_acc_offload)
329 ancestorReduction3dGPU(ilvl, myNodeCount, treePerm);
337 this->ancestorReduction3d(ilvl, myNodeCount, treePerm);
342 SCT->tSchCompUdt3d[ilvl] = ilvl == 0 ? SCT->NetSchurUpTimer
343 : SCT->NetSchurUpTimer - SCT->tSchCompUdt3d[ilvl - 1];
346 MPI_Barrier(grid3d->comm);
Descriptions and declarations for structures used in GPU.
typename std::conditional< std::is_same< Ftype, double >::value, dLocalLU_t, typename std::conditional< std::is_same< Ftype, float >::value, sLocalLU_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zLocalLU_t, void >::type >::type >::type LocalLU_type
Definition: luAuxStructTemplated.hpp:117
typename std::conditional< std::is_same< Ftype, double >::value, dLUstruct_t, typename std::conditional< std::is_same< Ftype, float >::value, sLUstruct_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zLUstruct_t, void >::type >::type >::type LUStruct_type
Definition: luAuxStructTemplated.hpp:102
typename std::conditional< std::is_same< Ftype, double >::value, dtrf3Dpartition_t, typename std::conditional< std::is_same< Ftype, float >::value, strf3Dpartition_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, ztrf3Dpartition_t, void >::type >::type >::type trf3dpartitionType
Definition: luAuxStructTemplated.hpp:87
typename std::conditional< std::is_same< Ftype, double >::value, dLUValSubBuf_t, typename std::conditional< std::is_same< Ftype, float >::value, sLUValSubBuf_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zLUValSubBuf_t, void >::type >::type >::type LUValSubBuf_type
Definition: luAuxStructTemplated.hpp:132
typename std::conditional< std::is_same< Ftype, float >::value, float, typename std::conditional< std::is_same< Ftype, double >::value||std::is_same< Ftype, doublecomplex >::value, double, float >::type >::type AnormType
Definition: luAuxStructTemplated.hpp:59
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
int_t pdgstrf3d_upacked(superlu_dist_options_t *options, int m, int n, AnormType< Ftype > anorm, trf3dpartitionType< Ftype > *trf3Dpartition, SCT_t *SCT, LUStruct_type< Ftype > *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, int *info)
Definition: pdgstrf3d_upacked_impl.hpp:32
Definition: util_dist.h:199
double NetSchurUpTimer
Definition: util_dist.h:218
double tSchCompUdt3d[MAX_3D_LEVEL]
Definition: util_dist.h:307
double tStartup
Definition: util_dist.h:313
double tFactor3D[MAX_3D_LEVEL]
Definition: util_dist.h:306
double pdgstrfTimer
Definition: util_dist.h:257
Definition: util_dist.h:101
float peak_buffer
Definition: util_dist.h:110
float current_buffer
Definition: util_dist.h:109
float gpu_buffer
Definition: util_dist.h:111
flops_t * ops
Definition: util_dist.h:104
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
gridinfo_t grid2d
Definition: superlu_defs.h:419
superlu_scope_t zscp
Definition: superlu_defs.h:418
int iam
Definition: superlu_defs.h:420
MPI_Comm comm
Definition: superlu_defs.h:415
Definition: superlu_defs.h:404
int iam
Definition: superlu_defs.h:408
Definition: superlu_defs.h:989
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:999
double cost
Definition: superlu_defs.h:1007
Definition: superlu_defs.h:728
int Np
Definition: superlu_defs.h:399
int Iam
Definition: superlu_defs.h:400
int_t * eTreeTopLims
Definition: superlu_defs.h:972
Definition: xlupanels.hpp:335
xLUstructGPU_t< Ftype > A_gpu
Definition: xlupanels.hpp:412
int_t packedU2skyline(LUStruct_type< Ftype > *LUstruct)
Definition: lupanels_impl.hpp:427
int_t dsparseTreeFactor(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:8
int_t pdgstrf3d()
Definition: pdgstrf3d_upacked_impl.hpp:224
int_t ancestorReduction3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm)
Definition: lupanels_comm3d_impl.hpp:9
Distributed SuperLU data types and function prototypes.
int getNsupers(int, Glu_persist_t *)
Definition: trfAux.c:42
ddiagFactBufs_t ** dinitDiagFactBufsArr(int mxLeafNode, int ldt, gridinfo_t *grid)
int_t dsparseTreeFactor(int_t nnodes, int_t *perm_c_supno, treeTopoInfo_t *treeTopoInfo, commRequests_t *comReqs, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, dLUValSubBuf_t *LUvsb, ddiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int *info)
int_t log2i(int_t index)
Definition: supernodal_etree.c:17
float smach_dist(const char *)
Definition: smach_dist.c:16
int_t reduceStat(PhaseType PHASE, SuperLUStat_t *stat, gridinfo3d_t *grid3d)
Definition: util.c:1366
int set_tag_ub(void)
Definition: trfAux.c:48
int sp_ienv_dist(int, superlu_dist_options_t *)
Definition: sp_ienv.c:80
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:186
int * getIsNodeInMyGrid(int_t nsupers, int_t maxLvl, int_t *myNodeCount, int_t **treePerm)
Definition: supernodalForest.c:307
int getNumThreads(int)
Definition: trfAux.c:61
int64_t int_t
Definition: superlu_defs.h:119
@ FACT
Definition: superlu_enum_consts.h:74
double SuperLU_timer_()
Definition: superlu_timer.c:72
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:56