5template <
typename Ftype>
20 CHECK_MALLOC(grid3d->iam,
"Enter dsparseTreeFactor_ASYNC()");
41 localNumChildrenLeft[
i]=0;
44 for(
int_t k0=0;k0<nnodes;k0++)
46 int_t k = perm_c_supno[k0];
48 int_t ik = myIperm[k_parent];
49 if(ik >-1 && ik<nnodes)
50 localNumChildrenLeft[ik]++;
55 int_t k_st = eTreeTopLims[topoLvl];
56 int_t k_end = eTreeTopLims[topoLvl + 1];
58 for (
int_t k0 = k_st; k0 < k_end; k0++)
60 int_t k = perm_c_supno[k0];
62 dDiagFactorPanelSolve(k, offset,dFBufs);
69 for (
int k0 = k_st; k0 < winSize; ++k0)
71 int_t k = perm_c_supno[k0];
72 int_t offset = k0%numLA;
73 if(!donePanelBcast[k0])
75 dPanelBcast(k, offset);
76 donePanelBcast[k0] =1;
82 int_t halfWin = numLA/2;
87 int_t k = perm_c_supno[k0];
88 int_t offset = (k0-k1)%winSize;
93 upanel_t k_upanel(UidxRecvBufs[offset], UvalRecvBufs[offset]) ;
94 lpanel_t k_lpanel(LidxRecvBufs[offset], LvalRecvBufs[offset]);
96 k_upanel= uPanelVec[g2lRow(k)];
98 k_lpanel = lPanelVec[g2lCol(k)];
102 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
103 lookAheadUpdate(k,k_parent, k_lpanel,k_upanel);
106 if(k_parent < nsupers)
108 int_t k0_parent = myIperm[k_parent];
109 if (k0_parent > 0 && k0_parent<nnodes)
111 localNumChildrenLeft[k0_parent]--;
112 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
115 dDiagFactorPanelSolve(k_parent, dOffset,dFBufs);
116 donePanelSolve[k0_parent]=1;
122 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
123 dSchurCompUpdateExcludeOne(k,k_parent, k_lpanel,k_upanel);
128 for (
int_t k0_next = k1; k0_next <
SUPERLU_MIN(nnodes, k1+winSize); ++k0_next)
130 int k_next = perm_c_supno[k0_next];
131 if (!localNumChildrenLeft[k0_next])
133 int offset_next = (k0_next-k1)%winSize;
135 offset_next += halfWin;
136 dPanelBcast(k_next, offset_next);
137 donePanelBcast[k0_next] =1;
141 winSize = k0_next - k1;
150 for (
int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
152 int_t k_st = eTreeTopLims[topoLvl];
153 int_t k_end = eTreeTopLims[topoLvl + 1];
154 for (
int_t k0 = k_st; k0 < k_end; ++k0)
156 int_t k = perm_c_supno[k0];
157 int_t offset = k0%numLA;
160 upanel_t k_upanel(UidxRecvBufs[offset], UvalRecvBufs[offset]) ;
161 lpanel_t k_lpanel(LidxRecvBufs[offset], LvalRecvBufs[offset]);
162 if (myrow == krow(k))
163 k_upanel= uPanelVec[g2lRow(k)];
164 if (mycol == kcol(k))
165 k_lpanel = lPanelVec[g2lCol(k)];
170 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
171 lookAheadUpdate(k,k_parent, k_lpanel,k_upanel);
174 if(k_parent < nsupers)
176 int_t k0_parent = myIperm[k_parent];
177 if (k0_parent > 0 && k0_parent<nnodes)
179 localNumChildrenLeft[k0_parent]--;
180 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
183 dDiagFactorPanelSolve(k_parent, dOffset,dFBufs);
184 donePanelSolve[k0_parent]=1;
189 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
190 dSchurCompUpdateExcludeOne(k,k_parent, k_lpanel,k_upanel);
192 for(
int_t k0_next=k0+1; k0_next<
SUPERLU_MIN(nnodes, k0+1+numLA); k0_next++ )
194 int k_next = perm_c_supno[k0_next];
195 if (!donePanelBcast[k0_next] &&
196 !localNumChildrenLeft[k0_next]
199 int offset_next = k0_next%numLA;
200 dPanelBcast(k_next, offset_next);
201 donePanelBcast[k0_next] =1;
211 #if (DEBUGlevel >= 1)
212 CHECK_MALLOC(grid3d->iam,
"Exit dsparseTreeFactor_ASYNC()");
217template <
typename Ftype>
231 CHECK_MALLOC(grid3d->iam,
"Enter dsparseTreeFactor_ASYNC()");
243 for (
int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
246 int_t k_st = eTreeTopLims[topoLvl];
247 int_t k_end = eTreeTopLims[topoLvl + 1];
248 for (
int_t k0 = k_st; k0 < k_end; ++k0)
250 int_t k = perm_c_supno[k0];
251 int_t offset = k0 - k_st;
254 if (iam == procIJ(k, k))
256 lPanelVec[g2lCol(k)].diagFactor(k, dFBufs[offset]->BlockUFactor, ksupc,
257 thresh, xsup, options, stat, info);
258 lPanelVec[g2lCol(k)].packDiagBlock(dFBufs[offset]->BlockLFactor, ksupc);
262 if (myrow == krow(k))
263 MPI_Bcast((
void *)dFBufs[offset]->BlockLFactor, ksupc * ksupc,
264 MPI_DOUBLE, kcol(k), (grid->rscp).comm);
265 if (mycol == kcol(k))
266 MPI_Bcast((
void *)dFBufs[offset]->BlockUFactor, ksupc * ksupc,
267 MPI_DOUBLE, krow(k), (grid->cscp).comm);
270 if (myrow == krow(k))
271 uPanelVec[g2lRow(k)].panelSolve(ksupc, dFBufs[offset]->BlockLFactor, ksupc);
273 if (mycol == kcol(k))
274 lPanelVec[g2lCol(k)].panelSolve(ksupc, dFBufs[offset]->BlockUFactor, ksupc);
279 if (myrow == krow(k))
281 k_upanel= uPanelVec[g2lRow(k)];
283 if (mycol == kcol(k))
284 k_lpanel = lPanelVec[g2lCol(k)];
286 if(UidxSendCounts[k]>0)
288 MPI_Bcast(k_upanel.index, UidxSendCounts[k],
mpi_int_t, krow(k), grid3d->cscp.comm);
289 MPI_Bcast(k_upanel.val, UvalSendCounts[k], MPI_DOUBLE, krow(k), grid3d->cscp.comm);
292 if(LidxSendCounts[k]>0)
294 MPI_Bcast(k_lpanel.index, LidxSendCounts[k],
mpi_int_t, kcol(k), grid3d->rscp.comm);
295 MPI_Bcast(k_lpanel.val, LvalSendCounts[k], MPI_DOUBLE, kcol(k), grid3d->rscp.comm);
300 #warning single node only
303 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
305 k_upanel.checkCorrectness();
306 dSchurComplementUpdate(k, k_lpanel, k_upanel);
318 CHECK_MALLOC(grid3d->iam,
"Exit dsparseTreeFactor_ASYNC()");
Definition: lupanels.hpp:19
Definition: lupanels.hpp:172
Definition: xlupanels.hpp:22
Definition: xlupanels.hpp:176
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
Definition: superlu_defs.h:978
int_t * setree
Definition: superlu_defs.h:979
Definition: superlu_defs.h:989
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:999
int_t * nodeList
Definition: superlu_defs.h:992
int_t nNodes
Definition: superlu_defs.h:991
Definition: superlu_defs.h:970
int_t * myIperm
Definition: superlu_defs.h:973
int_t numLvl
Definition: superlu_defs.h:971
int_t * eTreeTopLims
Definition: superlu_defs.h:972
int_t dsparseTreeFactor(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:8
int_t dsparseTreeFactorBaseline(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:220
Distributed SuperLU data types and function prototypes.
int_t * intMalloc_dist(int_t)
Definition: memory.c:210
#define SuperSize(bnum)
Definition: superlu_defs.h:271
#define mpi_int_t
Definition: superlu_defs.h:120
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:186
int64_t int_t
Definition: superlu_defs.h:119
int i
Definition: sutil_dist.c:287
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:56
#define SUPERLU_MIN(x, y)
Definition: util_dist.h:64
#define SUPERLU_FREE(addr)
Definition: util_dist.h:54