SuperLU Distributed 9.0.0
gpu3d
dsparseTreeFactor_upacked_impl.hpp
Go to the documentation of this file.
1#pragma once
2#include "superlu_ddefs.h"
3#include "lupanels.hpp"
4#include "superlu_blas.hpp"
6
7template <typename Ftype>
9 sForest_t *sforest,
10 diagFactBufs_type<Ftype>**dFBufs, // size maxEtree level
11 gEtreeInfo_t *gEtreeInfo, // global etree info
12 int tag_ub)
13{
14
15 int_t nnodes = sforest->nNodes; // number of nodes in the tree
16 if (nnodes < 1)
17 {
18 return 1;
19 }
20
21#if (DEBUGlevel >= 1)
22 CHECK_MALLOC(grid3d->iam, "Enter dsparseTreeFactor_ASYNC()");
23#endif
24
25 int_t *perm_c_supno = sforest->nodeList; // list of nodes in the order of factorization
26 treeTopoInfo_t *treeTopoInfo = &sforest->topoInfo;
27 int_t *myIperm = treeTopoInfo->myIperm;
28 int_t maxTopoLevel = treeTopoInfo->numLvl;
29 int_t *eTreeTopLims = treeTopoInfo->eTreeTopLims;
30
31 /*main loop over all the levels*/
32 int_t numLA = getNumLookAhead(options);
33
34 int_t* donePanelBcast = intMalloc_dist(nnodes);
35 int_t* donePanelSolve = intMalloc_dist(nnodes);
36 int_t* localNumChildrenLeft = intMalloc_dist(nnodes);
37
38 //TODO: not needed, remove after testing
39 for(int_t i=0;i<nnodes;i++)
40 {
41 donePanelBcast[i] =0;
42 donePanelSolve[i]=0;
43 localNumChildrenLeft[i]=0;
44 }
45
46 for(int_t k0=0;k0<nnodes;k0++)
47 {
48 int_t k = perm_c_supno[k0];
49 int_t k_parent = gEtreeInfo->setree[k];
50 int_t ik = myIperm[k_parent];
51 if(ik >-1 && ik<nnodes)
52 localNumChildrenLeft[ik]++;
53 }
54
55 // start the pipeline
56 int_t topoLvl =0;
57 int_t k_st = eTreeTopLims[topoLvl];
58 int_t k_end = eTreeTopLims[topoLvl + 1];
59 //TODO: make this asynchronous
60 for (int_t k0 = k_st; k0 < k_end; k0++)
61 {
62 int_t k = perm_c_supno[k0];
63 int_t offset = 0;
64 dDiagFactorPanelSolve(k, offset,dFBufs);
65 donePanelSolve[k0]=1;
66 }
67
68 //TODO: its really the panels that needs to be doubled
69 // everything else can remain as it is
70 int_t winSize = SUPERLU_MIN(numLA/2, eTreeTopLims[1]);
71 for (int k0 = k_st; k0 < winSize; ++k0)
72 {
73 int_t k = perm_c_supno[k0];
74 int_t offset = k0%numLA;
75 if(!donePanelBcast[k0])
76 {
77 dPanelBcast(k, offset);
78 donePanelBcast[k0] =1;
79 }
80 }/*for (int k0 = k_st; k0 < SUPERLU_MIN(k_end, k_st + numLA); ++k0)*/
81
82 int_t k1 =0;
83 int_t winParity=0;
84 int_t halfWin = numLA/2;
85 while(k1<nnodes)
86 {
87 for (int_t k0 = k1; k0 < SUPERLU_MIN(nnodes, k1+winSize); ++k0)
88 {
89 int_t k = perm_c_supno[k0];
90 int_t offset = (k0-k1)%winSize;
91 if(winParity%2)
92 offset+= halfWin; //
93
94 /*======= SchurComplement Update ======*/
95 xupanel_t<Ftype> k_upanel(UidxRecvBufs[offset], UvalRecvBufs[offset]) ;
96 xlpanel_t<Ftype> k_lpanel(LidxRecvBufs[offset], LvalRecvBufs[offset]);
97 if (myrow == krow(k))
98 k_upanel= uPanelVec[g2lRow(k)];
99 if (mycol == kcol(k))
100 k_lpanel = lPanelVec[g2lCol(k)];
101
102 int_t k_parent = gEtreeInfo->setree[k];
103 /* Look Ahead Panel Update */
104 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
105 lookAheadUpdate(k,k_parent, k_lpanel,k_upanel);
106
107 /* Look Ahead Panel Solve */
108 if(k_parent < nsupers)
109 {
110 int_t k0_parent = myIperm[k_parent];
111 if (k0_parent > 0 && k0_parent<nnodes)
112 {
113 localNumChildrenLeft[k0_parent]--;
114 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
115 {
116 int_t dOffset = 0;
117 dDiagFactorPanelSolve(k_parent, dOffset,dFBufs);
118 donePanelSolve[k0_parent]=1;
119 }
120 }
121 }
122
123 /*proceed with remaining SchurComplement update */
124 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
125 dSchurCompUpdateExcludeOne(k,k_parent, k_lpanel,k_upanel);
126
127 }
128
129 k1 = k1+winSize;
130 for (int_t k0_next = k1; k0_next < SUPERLU_MIN(nnodes, k1+winSize); ++k0_next)
131 {
132 int k_next = perm_c_supno[k0_next];
133 if (!localNumChildrenLeft[k0_next])
134 {
135 int offset_next = (k0_next-k1)%winSize;
136 if(!(winParity%2))
137 offset_next += halfWin;
138 dPanelBcast(k_next, offset_next);
139 donePanelBcast[k0_next] =1;
140 }
141 else
142 {
143 winSize = k0_next - k1;
144 break;
145 }
146 }
147
148 winParity++;
149 }
150
151#if 0
152 for (int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
153 {
154 int_t k_st = eTreeTopLims[topoLvl];
155 int_t k_end = eTreeTopLims[topoLvl + 1];
156 for (int_t k0 = k_st; k0 < k_end; ++k0)
157 {
158 int_t k = perm_c_supno[k0];
159 int_t offset = k0%numLA;
160
161 /*======= SchurComplement Update ======*/
162 xupanel_t<Ftype> k_upanel(UidxRecvBufs[offset], UvalRecvBufs[offset]) ;
163 xlpanel_t<Ftype> k_lpanel(LidxRecvBufs[offset], LvalRecvBufs[offset]);
164 if (myrow == krow(k))
165 k_upanel= uPanelVec[g2lRow(k)];
166 if (mycol == kcol(k))
167 k_lpanel = lPanelVec[g2lCol(k)];
168
169 int_t k_parent = gEtreeInfo->setree[k];
170
171 /* Look Ahead Panel Update */
172 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
173 lookAheadUpdate(k,k_parent, k_lpanel,k_upanel);
174
175 /* Look Ahead Panel Solve */
176 if(k_parent < nsupers)
177 {
178 int_t k0_parent = myIperm[k_parent];
179 if (k0_parent > 0 && k0_parent<nnodes)
180 {
181 localNumChildrenLeft[k0_parent]--;
182 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
183 {
184 int_t dOffset = 0;
185 dDiagFactorPanelSolve(k_parent, dOffset,dFBufs);
186 donePanelSolve[k0_parent]=1;
187 }
188 }
189 }
190 /*proceed with remaining SchurComplement update */
191 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
192 dSchurCompUpdateExcludeOne(k,k_parent, k_lpanel,k_upanel);
193 /*======= Look ahead Panel Bcast ======*/
194 for(int_t k0_next=k0+1; k0_next<SUPERLU_MIN(nnodes, k0+1+numLA); k0_next++ )
195 {
196 int k_next = perm_c_supno[k0_next];
197 if (!donePanelBcast[k0_next] &&
198 !localNumChildrenLeft[k0_next]
199 )
200 {
201 int offset_next = k0_next%numLA;
202 dPanelBcast(k_next, offset_next);
203 donePanelBcast[k0_next] =1;
204 }
205 }
206 } /*for k0= k_st:k_end */
207 } /*for topoLvl = 0:maxTopoLevel*/
208 #endif
209
210 SUPERLU_FREE( donePanelBcast);
211 SUPERLU_FREE( donePanelSolve);
212 SUPERLU_FREE(localNumChildrenLeft);
213 #if (DEBUGlevel >= 1)
214 CHECK_MALLOC(grid3d->iam, "Exit dsparseTreeFactor_ASYNC()");
215 #endif
216 return 0;
217}
218
219template <typename Ftype>
221 sForest_t *sforest,
222 diagFactBufs_type<Ftype>**dFBufs, // size maxEtree level
223 gEtreeInfo_t *gEtreeInfo, // global etree info
224 int tag_ub)
225{
226 int_t nnodes = sforest->nNodes; // number of nodes in the tree
227 if (nnodes < 1)
228 {
229 return 1;
230 }
231
232#if (DEBUGlevel >= 1)
233 CHECK_MALLOC(grid3d->iam, "Enter dsparseTreeFactor_ASYNC()");
234#endif
235
236 int_t *perm_c_supno = sforest->nodeList; // list of nodes in the order of factorization
237 treeTopoInfo_t *treeTopoInfo = &sforest->topoInfo;
238 int_t *myIperm = treeTopoInfo->myIperm;
239 int_t maxTopoLevel = treeTopoInfo->numLvl;
240 int_t *eTreeTopLims = treeTopoInfo->eTreeTopLims;
241
242 /*main loop over all the levels*/
243 int_t numLA = getNumLookAhead(options);
244
245 for (int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
246 {
247 /* code */
248 int_t k_st = eTreeTopLims[topoLvl];
249 int_t k_end = eTreeTopLims[topoLvl + 1];
250 for (int_t k0 = k_st; k0 < k_end; ++k0)
251 {
252 int_t k = perm_c_supno[k0];
253 int_t offset = k0 - k_st;
254 int_t ksupc = SuperSize(k);
255 /*======= Diagonal Factorization ======*/
256 if (iam == procIJ(k, k))
257 {
258 lPanelVec[g2lCol(k)].diagFactor(k, dFBufs[offset]->BlockUFactor, ksupc,
259 thresh, xsup, options, stat, info);
260 lPanelVec[g2lCol(k)].packDiagBlock(dFBufs[offset]->BlockLFactor, ksupc);
261 }
262
263 /*======= Diagonal Broadcast ======*/
264 if (myrow == krow(k))
265 MPI_Bcast((void *)dFBufs[offset]->BlockLFactor, ksupc * ksupc,
266 get_mpi_type<Ftype>(), kcol(k), (grid->rscp).comm);
267 if (mycol == kcol(k))
268 MPI_Bcast((void *)dFBufs[offset]->BlockUFactor, ksupc * ksupc,
269 get_mpi_type<Ftype>(), krow(k), (grid->cscp).comm);
270
271 /*======= Panel Update ======*/
272 if (myrow == krow(k))
273 uPanelVec[g2lRow(k)].panelSolve(ksupc, dFBufs[offset]->BlockLFactor, ksupc);
274
275 if (mycol == kcol(k))
276 lPanelVec[g2lCol(k)].panelSolve(ksupc, dFBufs[offset]->BlockUFactor, ksupc);
277
278 /*======= Panel Broadcast ======*/
279 xupanel_t<Ftype> k_upanel(UidxRecvBufs[0], UvalRecvBufs[0]) ;
280 xlpanel_t<Ftype> k_lpanel(LidxRecvBufs[0], LvalRecvBufs[0]);
281 if (myrow == krow(k))
282 {
283 k_upanel= uPanelVec[g2lRow(k)];
284 }
285 if (mycol == kcol(k))
286 k_lpanel = lPanelVec[g2lCol(k)];
287
288 if(UidxSendCounts[k]>0)
289 {
290 MPI_Bcast(k_upanel.index, UidxSendCounts[k], mpi_int_t, krow(k), grid3d->cscp.comm);
291 MPI_Bcast(k_upanel.val, UvalSendCounts[k], get_mpi_type<Ftype>(), krow(k), grid3d->cscp.comm);
292 }
293
294 if(LidxSendCounts[k]>0)
295 {
296 MPI_Bcast(k_lpanel.index, LidxSendCounts[k], mpi_int_t, kcol(k), grid3d->rscp.comm);
297 MPI_Bcast(k_lpanel.val, LvalSendCounts[k], get_mpi_type<Ftype>(), kcol(k), grid3d->rscp.comm);
298 }
299
300
301 /*======= Schurcomplement Update ======*/
302 #warning single node only
303 // dSchurComplementUpdate(k, lPanelVec[g2lCol(k)], uPanelVec[g2lRow(k)]);
304 // dSchurComplementUpdate(k, lPanelVec[g2lCol(k)], k_upanel);
305 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
306 {
307 k_upanel.checkCorrectness();
308 dSchurComplementUpdate(k, k_lpanel, k_upanel);
309
310 }
311 // MPI_Barrier(grid3d->comm);
312
313 } /*for k0= k_st:k_end */
314
315 } /*for topoLvl = 0:maxTopoLevel*/
316
317
318
319#if (DEBUGlevel >= 1)
320 CHECK_MALLOC(grid3d->iam, "Exit dsparseTreeFactor_ASYNC()");
321#endif
322
323 return 0;
324} /* dsparseTreeFactor_ASYNC */
Definition: xlupanels.hpp:22
int_t * index
Definition: xlupanels.hpp:24
Ftype * val
Definition: xlupanels.hpp:25
Definition: xlupanels.hpp:176
int_t * index
Definition: xlupanels.hpp:178
int_t checkCorrectness()
Definition: xlupanels.hpp:276
Ftype * val
Definition: xlupanels.hpp:179
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
Definition: superlu_defs.h:978
int_t * setree
Definition: superlu_defs.h:979
Definition: superlu_defs.h:989
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:999
int_t * nodeList
Definition: superlu_defs.h:992
int_t nNodes
Definition: superlu_defs.h:991
Definition: superlu_defs.h:970
int_t * myIperm
Definition: superlu_defs.h:973
int_t numLvl
Definition: superlu_defs.h:971
int_t * eTreeTopLims
Definition: superlu_defs.h:972
int_t dsparseTreeFactor(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:8
int_t dsparseTreeFactorBaseline(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:220
Distributed SuperLU data types and function prototypes.
int_t * intMalloc_dist(int_t)
Definition: memory.c:210
#define SuperSize(bnum)
Definition: superlu_defs.h:271
#define mpi_int_t
Definition: superlu_defs.h:120
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:186
int64_t int_t
Definition: superlu_defs.h:119
int i
Definition: sutil_dist.c:287
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:56
#define SUPERLU_MIN(x, y)
Definition: util_dist.h:64
#define SUPERLU_FREE(addr)
Definition: util_dist.h:54