7template <
typename Ftype>
22 CHECK_MALLOC(grid3d->iam,
"Enter dsparseTreeFactor_ASYNC()");
43 localNumChildrenLeft[
i]=0;
46 for(
int_t k0=0;k0<nnodes;k0++)
48 int_t k = perm_c_supno[k0];
50 int_t ik = myIperm[k_parent];
51 if(ik >-1 && ik<nnodes)
52 localNumChildrenLeft[ik]++;
57 int_t k_st = eTreeTopLims[topoLvl];
58 int_t k_end = eTreeTopLims[topoLvl + 1];
60 for (
int_t k0 = k_st; k0 < k_end; k0++)
62 int_t k = perm_c_supno[k0];
64 dDiagFactorPanelSolve(k, offset,dFBufs);
71 for (
int k0 = k_st; k0 < winSize; ++k0)
73 int_t k = perm_c_supno[k0];
74 int_t offset = k0%numLA;
75 if(!donePanelBcast[k0])
77 dPanelBcast(k, offset);
78 donePanelBcast[k0] =1;
84 int_t halfWin = numLA/2;
89 int_t k = perm_c_supno[k0];
90 int_t offset = (k0-k1)%winSize;
98 k_upanel= uPanelVec[g2lRow(k)];
100 k_lpanel = lPanelVec[g2lCol(k)];
104 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
105 lookAheadUpdate(k,k_parent, k_lpanel,k_upanel);
108 if(k_parent < nsupers)
110 int_t k0_parent = myIperm[k_parent];
111 if (k0_parent > 0 && k0_parent<nnodes)
113 localNumChildrenLeft[k0_parent]--;
114 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
117 dDiagFactorPanelSolve(k_parent, dOffset,dFBufs);
118 donePanelSolve[k0_parent]=1;
124 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
125 dSchurCompUpdateExcludeOne(k,k_parent, k_lpanel,k_upanel);
130 for (
int_t k0_next = k1; k0_next <
SUPERLU_MIN(nnodes, k1+winSize); ++k0_next)
132 int k_next = perm_c_supno[k0_next];
133 if (!localNumChildrenLeft[k0_next])
135 int offset_next = (k0_next-k1)%winSize;
137 offset_next += halfWin;
138 dPanelBcast(k_next, offset_next);
139 donePanelBcast[k0_next] =1;
143 winSize = k0_next - k1;
152 for (
int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
154 int_t k_st = eTreeTopLims[topoLvl];
155 int_t k_end = eTreeTopLims[topoLvl + 1];
156 for (
int_t k0 = k_st; k0 < k_end; ++k0)
158 int_t k = perm_c_supno[k0];
159 int_t offset = k0%numLA;
164 if (myrow == krow(k))
165 k_upanel= uPanelVec[g2lRow(k)];
166 if (mycol == kcol(k))
167 k_lpanel = lPanelVec[g2lCol(k)];
172 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
173 lookAheadUpdate(k,k_parent, k_lpanel,k_upanel);
176 if(k_parent < nsupers)
178 int_t k0_parent = myIperm[k_parent];
179 if (k0_parent > 0 && k0_parent<nnodes)
181 localNumChildrenLeft[k0_parent]--;
182 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
185 dDiagFactorPanelSolve(k_parent, dOffset,dFBufs);
186 donePanelSolve[k0_parent]=1;
191 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
192 dSchurCompUpdateExcludeOne(k,k_parent, k_lpanel,k_upanel);
194 for(
int_t k0_next=k0+1; k0_next<
SUPERLU_MIN(nnodes, k0+1+numLA); k0_next++ )
196 int k_next = perm_c_supno[k0_next];
197 if (!donePanelBcast[k0_next] &&
198 !localNumChildrenLeft[k0_next]
201 int offset_next = k0_next%numLA;
202 dPanelBcast(k_next, offset_next);
203 donePanelBcast[k0_next] =1;
213 #if (DEBUGlevel >= 1)
214 CHECK_MALLOC(grid3d->iam,
"Exit dsparseTreeFactor_ASYNC()");
219template <
typename Ftype>
233 CHECK_MALLOC(grid3d->iam,
"Enter dsparseTreeFactor_ASYNC()");
245 for (
int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
248 int_t k_st = eTreeTopLims[topoLvl];
249 int_t k_end = eTreeTopLims[topoLvl + 1];
250 for (
int_t k0 = k_st; k0 < k_end; ++k0)
252 int_t k = perm_c_supno[k0];
253 int_t offset = k0 - k_st;
256 if (iam == procIJ(k, k))
258 lPanelVec[g2lCol(k)].diagFactor(k, dFBufs[offset]->BlockUFactor, ksupc,
259 thresh, xsup, options, stat, info);
260 lPanelVec[g2lCol(k)].packDiagBlock(dFBufs[offset]->BlockLFactor, ksupc);
264 if (myrow == krow(k))
265 MPI_Bcast((
void *)dFBufs[offset]->BlockLFactor, ksupc * ksupc,
266 get_mpi_type<Ftype>(), kcol(k), (grid->rscp).comm);
267 if (mycol == kcol(k))
268 MPI_Bcast((
void *)dFBufs[offset]->BlockUFactor, ksupc * ksupc,
269 get_mpi_type<Ftype>(), krow(k), (grid->cscp).comm);
272 if (myrow == krow(k))
273 uPanelVec[g2lRow(k)].panelSolve(ksupc, dFBufs[offset]->BlockLFactor, ksupc);
275 if (mycol == kcol(k))
276 lPanelVec[g2lCol(k)].panelSolve(ksupc, dFBufs[offset]->BlockUFactor, ksupc);
281 if (myrow == krow(k))
283 k_upanel= uPanelVec[g2lRow(k)];
285 if (mycol == kcol(k))
286 k_lpanel = lPanelVec[g2lCol(k)];
288 if(UidxSendCounts[k]>0)
290 MPI_Bcast(k_upanel.
index, UidxSendCounts[k],
mpi_int_t, krow(k), grid3d->cscp.comm);
291 MPI_Bcast(k_upanel.
val, UvalSendCounts[k], get_mpi_type<Ftype>(), krow(k), grid3d->cscp.comm);
294 if(LidxSendCounts[k]>0)
296 MPI_Bcast(k_lpanel.
index, LidxSendCounts[k],
mpi_int_t, kcol(k), grid3d->rscp.comm);
297 MPI_Bcast(k_lpanel.
val, LvalSendCounts[k], get_mpi_type<Ftype>(), kcol(k), grid3d->rscp.comm);
302 #warning single node only
305 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
308 dSchurComplementUpdate(k, k_lpanel, k_upanel);
320 CHECK_MALLOC(grid3d->iam,
"Exit dsparseTreeFactor_ASYNC()");
Definition: xlupanels.hpp:22
int_t * index
Definition: xlupanels.hpp:24
Ftype * val
Definition: xlupanels.hpp:25
Definition: xlupanels.hpp:176
int_t * index
Definition: xlupanels.hpp:178
int_t checkCorrectness()
Definition: xlupanels.hpp:276
Ftype * val
Definition: xlupanels.hpp:179
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
Definition: superlu_defs.h:978
int_t * setree
Definition: superlu_defs.h:979
Definition: superlu_defs.h:989
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:999
int_t * nodeList
Definition: superlu_defs.h:992
int_t nNodes
Definition: superlu_defs.h:991
Definition: superlu_defs.h:970
int_t * myIperm
Definition: superlu_defs.h:973
int_t numLvl
Definition: superlu_defs.h:971
int_t * eTreeTopLims
Definition: superlu_defs.h:972
int_t dsparseTreeFactor(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:8
int_t dsparseTreeFactorBaseline(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:220
Distributed SuperLU data types and function prototypes.
int_t * intMalloc_dist(int_t)
Definition: memory.c:210
#define SuperSize(bnum)
Definition: superlu_defs.h:271
#define mpi_int_t
Definition: superlu_defs.h:120
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:186
int64_t int_t
Definition: superlu_defs.h:119
int i
Definition: sutil_dist.c:287
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:56
#define SUPERLU_MIN(x, y)
Definition: util_dist.h:64
#define SUPERLU_FREE(addr)
Definition: util_dist.h:54