9template <
typename Ftype>
24 CHECK_MALLOC(grid3d->iam,
"Enter dAncestorFactor_ASYNC()");
36 for (
int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
39 int_t k_st = eTreeTopLims[topoLvl];
40 int_t k_end = eTreeTopLims[topoLvl + 1];
41 for (
int_t k0 = k_st; k0 < k_end; ++k0)
43 int_t k = perm_c_supno[k0];
44 int kRoot = anc25d.rootRank(k0, alvl);
48 void* sendBuf = (
void*) lPanelVec[g2lCol(k)].gpuPanel.val;
49 if (anc25d.rankHasGrid(k0, alvl))
50 sendBuf = MPI_IN_PLACE;
52 MPI_Reduce(sendBuf, lPanelVec[g2lCol(k)].gpuPanel.val,
53 lPanelVec[g2lCol(k)].nzvalSize(), get_mpi_type<Ftype>(), MPI_SUM, kRoot, anc25d.getComm(alvl));
59 void* sendBuf = (
void*) uPanelVec[g2lRow(k)].gpuPanel.val;
60 if (anc25d.rankHasGrid(k0, alvl))
61 sendBuf = MPI_IN_PLACE;
62 MPI_Reduce(sendBuf, uPanelVec[g2lRow(k)].gpuPanel.val,
63 uPanelVec[g2lRow(k)].nzvalSize(), get_mpi_type<Ftype>(), MPI_SUM, kRoot, anc25d.getComm(alvl));
67 if (anc25d.rankHasGrid(k0, alvl))
70 int_t offset = k0 - k_st;
72 dDFactPSolveGPU(k, offset, dFBufs);
76 if (iam == procIJ(k, k))
78 lPanelVec[g2lCol(k)].diagFactor(k, dFBufs[offset]->BlockUFactor, ksupc,
79 thresh, xsup, options, stat, info);
80 lPanelVec[g2lCol(k)].packDiagBlock(dFBufs[offset]->BlockLFactor, ksupc);
85 MPI_Bcast((
void *)dFBufs[offset]->BlockLFactor, ksupc * ksupc,
86 get_mpi_type<Ftype>(), kcol(k), (grid->rscp).comm);
88 MPI_Bcast((
void *)dFBufs[offset]->BlockUFactor, ksupc * ksupc,
89 get_mpi_type<Ftype>(), krow(k), (grid->cscp).comm);
93 uPanelVec[g2lRow(k)].panelSolve(ksupc, dFBufs[offset]->BlockLFactor, ksupc);
96 lPanelVec[g2lCol(k)].panelSolve(ksupc, dFBufs[offset]->BlockUFactor, ksupc);
103 A_gpu.UidxRecvBufs[0], A_gpu.UvalRecvBufs[0]);
105 A_gpu.LidxRecvBufs[0], A_gpu.LvalRecvBufs[0]);
106 if (myrow == krow(k))
108 k_upanel = uPanelVec[g2lRow(k)];
110 if (mycol == kcol(k))
111 k_lpanel = lPanelVec[g2lCol(k)];
113 if (UidxSendCounts[k] > 0)
115 MPI_Bcast(k_upanel.gpuPanel.index, UidxSendCounts[k],
mpi_int_t, krow(k), grid3d->cscp.comm);
116 MPI_Bcast(k_upanel.gpuPanel.val, UvalSendCounts[k], get_mpi_type<Ftype>(), krow(k), grid3d->cscp.comm);
119 if (LidxSendCounts[k] > 0)
121 MPI_Bcast(k_lpanel.gpuPanel.index, LidxSendCounts[k],
mpi_int_t, kcol(k), grid3d->rscp.comm);
122 MPI_Bcast(k_lpanel.gpuPanel.val, LvalSendCounts[k], get_mpi_type<Ftype>(), kcol(k), grid3d->rscp.comm);
126#warning single node only
129 if (UidxSendCounts[k] > 0 && LidxSendCounts[k] > 0)
131 k_upanel.checkCorrectness();
134 dSchurComplementUpdateGPU(
136 k, k_lpanel, k_upanel);
141 if (mycol == kcol(k))
142 MPI_Bcast(lPanelVec[g2lCol(k)].gpuPanel.val,
143 lPanelVec[g2lCol(k)].nzvalSize(), get_mpi_type<Ftype>(), kRoot, anc25d.getComm(alvl));
145 if (myrow == krow(k))
146 MPI_Bcast(uPanelVec[g2lRow(k)].gpuPanel.val,
147 uPanelVec[g2lRow(k)].nzvalSize(), get_mpi_type<Ftype>(), kRoot, anc25d.getComm(alvl));
155 CHECK_MALLOC(grid3d->iam,
"Exit dAncestorFactor_ASYNC()");
Definition: xlupanels.hpp:22
Definition: xlupanels.hpp:176
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
Definition: superlu_defs.h:978
Definition: superlu_defs.h:989
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:999
int_t * nodeList
Definition: superlu_defs.h:992
int_t nNodes
Definition: superlu_defs.h:991
Definition: superlu_defs.h:970
int_t numLvl
Definition: superlu_defs.h:971
int_t * eTreeTopLims
Definition: superlu_defs.h:972
Definition: xlupanels.hpp:335
Distributed SuperLU data types and function prototypes.
#define mpi_int_t
Definition: superlu_defs.h:120
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:186
int64_t int_t
Definition: superlu_defs.h:119
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:56