SuperLU Distributed 9.0.0
gpu3d
dAncestorFactor_impl.hpp
Go to the documentation of this file.
1#pragma once
2#include "superlu_ddefs.h"
3#include "lupanels.hpp"
4
5template <typename Ftype>
7 int_t ilvl,
8 sForest_t *sforest,
9 diagFactBufs_type<Ftype> **dFBufs, // size maxEtree level
10 gEtreeInfo_t *gEtreeInfo, // global etree info
11 int tag_ub)
12{
13
14 int_t nnodes = sforest->nNodes; // number of nodes in the tree
15 if (nnodes < 1)
16 {
17 return 1;
18 }
19
20#if (DEBUGlevel >= 1)
21 CHECK_MALLOC(grid3d->iam, "Enter dAncestorFactor_ASYNC()");
22#endif
23
24 int_t *perm_c_supno = sforest->nodeList; // list of nodes in the order of factorization
25 treeTopoInfo_t *treeTopoInfo = &sforest->topoInfo;
26 int_t *myIperm = treeTopoInfo->myIperm;
27 int_t maxTopoLevel = treeTopoInfo->numLvl;
28 int_t *eTreeTopLims = treeTopoInfo->eTreeTopLims;
29
30 /*main loop over all the levels*/
31 int_t numLA = getNumLookAhead(options);
32
33 int_t *donePanelBcast = intMalloc_dist(nnodes);
34 int_t *donePanelSolve = intMalloc_dist(nnodes);
35 int_t *localNumChildrenLeft = intMalloc_dist(nnodes);
36
37 // TODO: not needed, remove after testing
38 for (int_t i = 0; i < nnodes; i++)
39 {
40 donePanelBcast[i] = 0;
41 donePanelSolve[i] = 0;
42 localNumChildrenLeft[i] = 0;
43 }
44
45 for (int_t k0 = 0; k0 < nnodes; k0++)
46 {
47 int_t k = perm_c_supno[k0];
48 int_t k_parent = gEtreeInfo->setree[k];
49 int_t ik = myIperm[k_parent];
50 if (ik > -1 && ik < nnodes)
51 localNumChildrenLeft[ik]++;
52 }
53
54 // start the pipeline
55 int_t topoLvl = 0;
56 int_t k_st = eTreeTopLims[topoLvl];
57 int_t k_end = eTreeTopLims[topoLvl + 1];
58 // TODO: make this asynchronous
59 for (int_t k0 = k_st; k0 < k_end; k0++)
60 {
61 int_t k = perm_c_supno[k0];
62 int_t offset = 0;
63 dDiagFactorPanelSolve(k, offset, dFBufs);
64 donePanelSolve[k0] = 1;
65 }
66
67 // TODO: its really the panels that needs to be doubled
68 // everything else can remain as it is
69 int_t winSize = SUPERLU_MIN(numLA / 2, eTreeTopLims[1]);
70 for (int k0 = k_st; k0 < winSize; ++k0)
71 {
72 int_t k = perm_c_supno[k0];
73 int_t offset = k0 % numLA;
74 if (!donePanelBcast[k0])
75 {
76 dPanelBcast(k, offset);
77 donePanelBcast[k0] = 1;
78 }
79 } /*for (int k0 = k_st; k0 < SUPERLU_MIN(k_end, k_st + numLA); ++k0)*/
80
81 int_t k1 = 0;
82 int_t winParity = 0;
83 int_t halfWin = numLA / 2;
84 while (k1 < nnodes)
85 {
86 for (int_t k0 = k1; k0 < SUPERLU_MIN(nnodes, k1 + winSize); ++k0)
87 {
88 int_t k = perm_c_supno[k0];
89 int_t offset = (k0 - k1) % winSize;
90 if (winParity % 2)
91 offset += halfWin; //
92
93 /*======= SchurComplement Update ======*/
94 xupanel_t<Ftype> k_upanel(UidxRecvBufs[offset], UvalRecvBufs[offset]);
95 xlpanel_t<Ftype> k_lpanel(LidxRecvBufs[offset], LvalRecvBufs[offset]);
96 if (myrow == krow(k))
97 k_upanel = uPanelVec[g2lRow(k)];
98 if (mycol == kcol(k))
99 k_lpanel = lPanelVec[g2lCol(k)];
100
101 int_t k_parent = gEtreeInfo->setree[k];
102 /* Look Ahead Panel Update */
103 if (UidxSendCounts[k] > 0 && LidxSendCounts[k] > 0)
104 lookAheadUpdate(k, k_parent, k_lpanel, k_upanel);
105
106 /* Look Ahead Panel Solve */
107 if (k_parent < nsupers)
108 {
109 int_t k0_parent = myIperm[k_parent];
110 if (k0_parent > 0 && k0_parent < nnodes)
111 {
112 localNumChildrenLeft[k0_parent]--;
113 if (topoLvl < maxTopoLevel - 1 && !localNumChildrenLeft[k0_parent])
114 {
115 int_t dOffset = 0;
116 dDiagFactorPanelSolve(k_parent, dOffset, dFBufs);
117 donePanelSolve[k0_parent] = 1;
118 }
119 }
120 }
121
122 /*proceed with remaining SchurComplement update */
123 if (UidxSendCounts[k] > 0 && LidxSendCounts[k] > 0)
124 dSchurCompUpdateExcludeOne(k, k_parent, k_lpanel, k_upanel);
125 }
126
127 k1 = k1 + winSize;
128 for (int_t k0_next = k1; k0_next < SUPERLU_MIN(nnodes, k1 + winSize); ++k0_next)
129 {
130 int k_next = perm_c_supno[k0_next];
131 if (!localNumChildrenLeft[k0_next])
132 {
133 int offset_next = (k0_next - k1) % winSize;
134 if (!(winParity % 2))
135 offset_next += halfWin;
136 dPanelBcast(k_next, offset_next);
137 donePanelBcast[k0_next] = 1;
138 }
139 else
140 {
141 winSize = k0_next - k1;
142 break;
143 }
144 }
145
146 winParity++;
147 }
148
149 SUPERLU_FREE(donePanelBcast);
150 SUPERLU_FREE(donePanelSolve);
151 SUPERLU_FREE(localNumChildrenLeft);
152#if (DEBUGlevel >= 1)
153 CHECK_MALLOC(grid3d->iam, "Exit dAncestorFactor_ASYNC()");
154#endif
155}
156
Definition: xlupanels.hpp:22
Definition: xlupanels.hpp:176
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
Definition: superlu_defs.h:978
int_t * setree
Definition: superlu_defs.h:979
Definition: superlu_defs.h:989
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:999
int_t * nodeList
Definition: superlu_defs.h:992
int_t nNodes
Definition: superlu_defs.h:991
Definition: superlu_defs.h:970
int_t * myIperm
Definition: superlu_defs.h:973
int_t numLvl
Definition: superlu_defs.h:971
int_t * eTreeTopLims
Definition: superlu_defs.h:972
int_t dAncestorFactor(int_t alvl, sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dAncestorFactor_impl.hpp:6
Distributed SuperLU data types and function prototypes.
int_t * intMalloc_dist(int_t)
Definition: memory.c:210
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:186
int64_t int_t
Definition: superlu_defs.h:119
int i
Definition: sutil_dist.c:287
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:56
#define SUPERLU_MIN(x, y)
Definition: util_dist.h:64
#define SUPERLU_FREE(addr)
Definition: util_dist.h:54