SuperLU Distributed 9.0.0
gpu3d
sparseTreeFactor_impl.hpp
Go to the documentation of this file.
1#pragma once
2#include "superlu_ddefs.h"
3// #include "lupanels.hpp"
4
5template <typename Ftype>
7 sForest_t *sforest,
8 diagFactBufs_type<Ftype>**dFBufs, // size maxEtree level
9 gEtreeInfo_t *gEtreeInfo, // global etree info
10 int tag_ub)
11{
12
13 int_t nnodes = sforest->nNodes; // number of nodes in the tree
14 if (nnodes < 1)
15 {
16 return 1;
17 }
18
19#if (DEBUGlevel >= 1)
20 CHECK_MALLOC(grid3d->iam, "Enter dsparseTreeFactor_ASYNC()");
21#endif
22
23 int_t *perm_c_supno = sforest->nodeList; // list of nodes in the order of factorization
24 treeTopoInfo_t *treeTopoInfo = &sforest->topoInfo;
25 int_t *myIperm = treeTopoInfo->myIperm;
26 int_t maxTopoLevel = treeTopoInfo->numLvl;
27 int_t *eTreeTopLims = treeTopoInfo->eTreeTopLims;
28
29 /*main loop over all the levels*/
30 int_t numLA = getNumLookAhead(options);
31
32 int_t* donePanelBcast = intMalloc_dist(nnodes);
33 int_t* donePanelSolve = intMalloc_dist(nnodes);
34 int_t* localNumChildrenLeft = intMalloc_dist(nnodes);
35
36 //TODO: not needed, remove after testing
37 for(int_t i=0;i<nnodes;i++)
38 {
39 donePanelBcast[i] =0;
40 donePanelSolve[i]=0;
41 localNumChildrenLeft[i]=0;
42 }
43
44 for(int_t k0=0;k0<nnodes;k0++)
45 {
46 int_t k = perm_c_supno[k0];
47 int_t k_parent = gEtreeInfo->setree[k];
48 int_t ik = myIperm[k_parent];
49 if(ik >-1 && ik<nnodes)
50 localNumChildrenLeft[ik]++;
51 }
52
53 // start the pipeline
54 int_t topoLvl =0;
55 int_t k_st = eTreeTopLims[topoLvl];
56 int_t k_end = eTreeTopLims[topoLvl + 1];
57 //TODO: make this asynchronous
58 for (int_t k0 = k_st; k0 < k_end; k0++)
59 {
60 int_t k = perm_c_supno[k0];
61 int_t offset = 0;
62 dDiagFactorPanelSolve(k, offset,dFBufs);
63 donePanelSolve[k0]=1;
64 }
65
66 //TODO: its really the panels that needs to be doubled
67 // everything else can remain as it is
68 int_t winSize = SUPERLU_MIN(numLA/2, eTreeTopLims[1]);
69 for (int k0 = k_st; k0 < winSize; ++k0)
70 {
71 int_t k = perm_c_supno[k0];
72 int_t offset = k0%numLA;
73 if(!donePanelBcast[k0])
74 {
75 dPanelBcast(k, offset);
76 donePanelBcast[k0] =1;
77 }
78 }/*for (int k0 = k_st; k0 < SUPERLU_MIN(k_end, k_st + numLA); ++k0)*/
79
80 int_t k1 =0;
81 int_t winParity=0;
82 int_t halfWin = numLA/2;
83 while(k1<nnodes)
84 {
85 for (int_t k0 = k1; k0 < SUPERLU_MIN(nnodes, k1+winSize); ++k0)
86 {
87 int_t k = perm_c_supno[k0];
88 int_t offset = (k0-k1)%winSize;
89 if(winParity%2)
90 offset+= halfWin; //
91
92 /*======= SchurComplement Update ======*/
93 upanel_t k_upanel(UidxRecvBufs[offset], UvalRecvBufs[offset]) ;
94 lpanel_t k_lpanel(LidxRecvBufs[offset], LvalRecvBufs[offset]);
95 if (myrow == krow(k))
96 k_upanel= uPanelVec[g2lRow(k)];
97 if (mycol == kcol(k))
98 k_lpanel = lPanelVec[g2lCol(k)];
99
100 int_t k_parent = gEtreeInfo->setree[k];
101 /* Look Ahead Panel Update */
102 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
103 lookAheadUpdate(k,k_parent, k_lpanel,k_upanel);
104
105 /* Look Ahead Panel Solve */
106 if(k_parent < nsupers)
107 {
108 int_t k0_parent = myIperm[k_parent];
109 if (k0_parent > 0 && k0_parent<nnodes)
110 {
111 localNumChildrenLeft[k0_parent]--;
112 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
113 {
114 int_t dOffset = 0;
115 dDiagFactorPanelSolve(k_parent, dOffset,dFBufs);
116 donePanelSolve[k0_parent]=1;
117 }
118 }
119 }
120
121 /*proceed with remaining SchurComplement update */
122 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
123 dSchurCompUpdateExcludeOne(k,k_parent, k_lpanel,k_upanel);
124
125 }
126
127 k1 = k1+winSize;
128 for (int_t k0_next = k1; k0_next < SUPERLU_MIN(nnodes, k1+winSize); ++k0_next)
129 {
130 int k_next = perm_c_supno[k0_next];
131 if (!localNumChildrenLeft[k0_next])
132 {
133 int offset_next = (k0_next-k1)%winSize;
134 if(!(winParity%2))
135 offset_next += halfWin;
136 dPanelBcast(k_next, offset_next);
137 donePanelBcast[k0_next] =1;
138 }
139 else
140 {
141 winSize = k0_next - k1;
142 break;
143 }
144 }
145
146 winParity++;
147 }
148
149#if 0
150 for (int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
151 {
152 int_t k_st = eTreeTopLims[topoLvl];
153 int_t k_end = eTreeTopLims[topoLvl + 1];
154 for (int_t k0 = k_st; k0 < k_end; ++k0)
155 {
156 int_t k = perm_c_supno[k0];
157 int_t offset = k0%numLA;
158
159 /*======= SchurComplement Update ======*/
160 upanel_t k_upanel(UidxRecvBufs[offset], UvalRecvBufs[offset]) ;
161 lpanel_t k_lpanel(LidxRecvBufs[offset], LvalRecvBufs[offset]);
162 if (myrow == krow(k))
163 k_upanel= uPanelVec[g2lRow(k)];
164 if (mycol == kcol(k))
165 k_lpanel = lPanelVec[g2lCol(k)];
166
167 int_t k_parent = gEtreeInfo->setree[k];
168
169 /* Look Ahead Panel Update */
170 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
171 lookAheadUpdate(k,k_parent, k_lpanel,k_upanel);
172
173 /* Look Ahead Panel Solve */
174 if(k_parent < nsupers)
175 {
176 int_t k0_parent = myIperm[k_parent];
177 if (k0_parent > 0 && k0_parent<nnodes)
178 {
179 localNumChildrenLeft[k0_parent]--;
180 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
181 {
182 int_t dOffset = 0;
183 dDiagFactorPanelSolve(k_parent, dOffset,dFBufs);
184 donePanelSolve[k0_parent]=1;
185 }
186 }
187 }
188 /*proceed with remaining SchurComplement update */
189 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
190 dSchurCompUpdateExcludeOne(k,k_parent, k_lpanel,k_upanel);
191 /*======= Look ahead Panel Bcast ======*/
192 for(int_t k0_next=k0+1; k0_next<SUPERLU_MIN(nnodes, k0+1+numLA); k0_next++ )
193 {
194 int k_next = perm_c_supno[k0_next];
195 if (!donePanelBcast[k0_next] &&
196 !localNumChildrenLeft[k0_next]
197 )
198 {
199 int offset_next = k0_next%numLA;
200 dPanelBcast(k_next, offset_next);
201 donePanelBcast[k0_next] =1;
202 }
203 }
204 } /*for k0= k_st:k_end */
205 } /*for topoLvl = 0:maxTopoLevel*/
206 #endif
207
208 SUPERLU_FREE( donePanelBcast);
209 SUPERLU_FREE( donePanelSolve);
210 SUPERLU_FREE(localNumChildrenLeft);
211 #if (DEBUGlevel >= 1)
212 CHECK_MALLOC(grid3d->iam, "Exit dsparseTreeFactor_ASYNC()");
213 #endif
214
215}
216
217template <typename Ftype>
219 sForest_t *sforest,
220 diagFactBufs_type<Ftype>**dFBufs, // size maxEtree level
221 gEtreeInfo_t *gEtreeInfo, // global etree info
222 int tag_ub)
223{
224 int_t nnodes = sforest->nNodes; // number of nodes in the tree
225 if (nnodes < 1)
226 {
227 return 1;
228 }
229
230#if (DEBUGlevel >= 1)
231 CHECK_MALLOC(grid3d->iam, "Enter dsparseTreeFactor_ASYNC()");
232#endif
233
234 int_t *perm_c_supno = sforest->nodeList; // list of nodes in the order of factorization
235 treeTopoInfo_t *treeTopoInfo = &sforest->topoInfo;
236 int_t *myIperm = treeTopoInfo->myIperm;
237 int_t maxTopoLevel = treeTopoInfo->numLvl;
238 int_t *eTreeTopLims = treeTopoInfo->eTreeTopLims;
239
240 /*main loop over all the levels*/
241 int_t numLA = getNumLookAhead(options);
242
243 for (int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
244 {
245 /* code */
246 int_t k_st = eTreeTopLims[topoLvl];
247 int_t k_end = eTreeTopLims[topoLvl + 1];
248 for (int_t k0 = k_st; k0 < k_end; ++k0)
249 {
250 int_t k = perm_c_supno[k0];
251 int_t offset = k0 - k_st;
252 int_t ksupc = SuperSize(k);
253 /*======= Diagonal Factorization ======*/
254 if (iam == procIJ(k, k))
255 {
256 lPanelVec[g2lCol(k)].diagFactor(k, dFBufs[offset]->BlockUFactor, ksupc,
257 thresh, xsup, options, stat, info);
258 lPanelVec[g2lCol(k)].packDiagBlock(dFBufs[offset]->BlockLFactor, ksupc);
259 }
260
261 /*======= Diagonal Broadcast ======*/
262 if (myrow == krow(k))
263 MPI_Bcast((void *)dFBufs[offset]->BlockLFactor, ksupc * ksupc,
264 MPI_DOUBLE, kcol(k), (grid->rscp).comm);
265 if (mycol == kcol(k))
266 MPI_Bcast((void *)dFBufs[offset]->BlockUFactor, ksupc * ksupc,
267 MPI_DOUBLE, krow(k), (grid->cscp).comm);
268
269 /*======= Panel Update ======*/
270 if (myrow == krow(k))
271 uPanelVec[g2lRow(k)].panelSolve(ksupc, dFBufs[offset]->BlockLFactor, ksupc);
272
273 if (mycol == kcol(k))
274 lPanelVec[g2lCol(k)].panelSolve(ksupc, dFBufs[offset]->BlockUFactor, ksupc);
275
276 /*======= Panel Broadcast ======*/
277 xupanel_t<Ftype> k_upanel(UidxRecvBufs[0], UvalRecvBufs[0]) ;
278 xlpanel_t<Ftype> k_lpanel(LidxRecvBufs[0], LvalRecvBufs[0]);
279 if (myrow == krow(k))
280 {
281 k_upanel= uPanelVec[g2lRow(k)];
282 }
283 if (mycol == kcol(k))
284 k_lpanel = lPanelVec[g2lCol(k)];
285
286 if(UidxSendCounts[k]>0)
287 {
288 MPI_Bcast(k_upanel.index, UidxSendCounts[k], mpi_int_t, krow(k), grid3d->cscp.comm);
289 MPI_Bcast(k_upanel.val, UvalSendCounts[k], MPI_DOUBLE, krow(k), grid3d->cscp.comm);
290 }
291
292 if(LidxSendCounts[k]>0)
293 {
294 MPI_Bcast(k_lpanel.index, LidxSendCounts[k], mpi_int_t, kcol(k), grid3d->rscp.comm);
295 MPI_Bcast(k_lpanel.val, LvalSendCounts[k], MPI_DOUBLE, kcol(k), grid3d->rscp.comm);
296 }
297
298
299 /*======= Schurcomplement Update ======*/
300 #warning single node only
301 // dSchurComplementUpdate(k, lPanelVec[g2lCol(k)], uPanelVec[g2lRow(k)]);
302 // dSchurComplementUpdate(k, lPanelVec[g2lCol(k)], k_upanel);
303 if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0)
304 {
305 k_upanel.checkCorrectness();
306 dSchurComplementUpdate(k, k_lpanel, k_upanel);
307
308 }
309 // MPI_Barrier(grid3d->comm);
310
311 } /*for k0= k_st:k_end */
312
313 } /*for topoLvl = 0:maxTopoLevel*/
314
315
316
317#if (DEBUGlevel >= 1)
318 CHECK_MALLOC(grid3d->iam, "Exit dsparseTreeFactor_ASYNC()");
319#endif
320
321 return 0;
322} /* dsparseTreeFactor_ASYNC */
Definition: lupanels.hpp:19
Definition: lupanels.hpp:172
Definition: xlupanels.hpp:22
Definition: xlupanels.hpp:176
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
Definition: superlu_defs.h:978
int_t * setree
Definition: superlu_defs.h:979
Definition: superlu_defs.h:989
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:999
int_t * nodeList
Definition: superlu_defs.h:992
int_t nNodes
Definition: superlu_defs.h:991
Definition: superlu_defs.h:970
int_t * myIperm
Definition: superlu_defs.h:973
int_t numLvl
Definition: superlu_defs.h:971
int_t * eTreeTopLims
Definition: superlu_defs.h:972
int_t dsparseTreeFactor(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:8
int_t dsparseTreeFactorBaseline(sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked_impl.hpp:220
Distributed SuperLU data types and function prototypes.
int_t * intMalloc_dist(int_t)
Definition: memory.c:210
#define SuperSize(bnum)
Definition: superlu_defs.h:271
#define mpi_int_t
Definition: superlu_defs.h:120
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:186
int64_t int_t
Definition: superlu_defs.h:119
int i
Definition: sutil_dist.c:287
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:56
#define SUPERLU_MIN(x, y)
Definition: util_dist.h:64
#define SUPERLU_FREE(addr)
Definition: util_dist.h:54