SuperLU Distributed 9.0.0
gpu3d
anc25d_impl.hpp
Go to the documentation of this file.
1#pragma once
2#include "lupanels.hpp"
3#include "anc25d.hpp"
5
6// Sherry: moved from anc25d.cpp to here
7MPI_Comm *anc25d_t::initComm(gridinfo3d_t *grid3d)
8{
9 int maxLvl = log2i(grid3d->zscp.Np) + 1;
10 int myGrid = grid3d->zscp.Iam;
11 MPI_Comm *zCommOut = (MPI_Comm *)SUPERLU_MALLOC((maxLvl - 1) * sizeof(MPI_Comm));
12 MPI_Comm zComm = grid3d->zscp.comm;
13 for (int_t alvl = 0; alvl < maxLvl - 1; ++alvl)
14 {
15 int lvlCommSize = 1 << (alvl + 1);
16 int lvlCommBase = (myGrid / lvlCommSize) * lvlCommSize;
17 int lvlCommRank = myGrid - lvlCommBase;
18 MPI_Comm lvlComm;
19 MPI_Comm_split(zComm, lvlCommBase, lvlCommRank, &lvlComm);
20 zCommOut[alvl] = lvlComm;
21 }
22
23 return zCommOut;
24}
25
26
27// A function that prints
28template <typename Ftype>
30 int_t alvl,
31 sForest_t *sforest,
32 diagFactBufs_type<Ftype> **dFBufs, // size maxEtree level
33 gEtreeInfo_t *gEtreeInfo, // global etree info
34 int tag_ub)
35{
36 int_t nnodes = sforest->nNodes; // number of nodes in the tree
37 if (nnodes < 1)
38 {
39 return 1;
40 }
41
42#if (DEBUGlevel >= 1)
43 CHECK_MALLOC(grid3d->iam, "Enter dAncestorFactor_ASYNC()");
44#endif
45
46 int_t *perm_c_supno = sforest->nodeList; // list of nodes in the order of factorization
47 treeTopoInfo_t *treeTopoInfo = &sforest->topoInfo;
48 // int_t *myIperm = treeTopoInfo->myIperm;
49 int_t maxTopoLevel = treeTopoInfo->numLvl;
50 int_t *eTreeTopLims = treeTopoInfo->eTreeTopLims;
51
52 /*main loop over all the levels*/
53 int_t numLA = getNumLookAhead(options);
54
55 for (int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl)
56 {
57 /* code */
58 int_t k_st = eTreeTopLims[topoLvl];
59 int_t k_end = eTreeTopLims[topoLvl + 1];
60 for (int_t k0 = k_st; k0 < k_end; ++k0)
61 {
62 int_t k = perm_c_supno[k0];
63 int kRoot = anc25d.rootRank(k0, alvl);
64 // reduce the l and u panels to the root with MPI_Comm = anc25d.getComm(alvl);
65 if (mycol == kcol(k))
66 {
67 void *sendBuf = (void *)lPanelVec[g2lCol(k)].val;
68 if (anc25d.rankHasGrid(k0, alvl))
69 sendBuf = MPI_IN_PLACE;
70
71 MPI_Reduce(sendBuf, lPanelVec[g2lCol(k)].val,
72 lPanelVec[g2lCol(k)].nzvalSize(), get_mpi_type<Ftype>(), MPI_SUM, kRoot, anc25d.getComm(alvl));
73 }
74
75 if (myrow == krow(k))
76 {
77 void *sendBuf = (void *)uPanelVec[g2lRow(k)].val;
78 if (anc25d.rankHasGrid(k0, alvl))
79 sendBuf = MPI_IN_PLACE;
80 MPI_Reduce(sendBuf, uPanelVec[g2lRow(k)].val,
81 uPanelVec[g2lRow(k)].nzvalSize(), get_mpi_type<Ftype>(), MPI_SUM, kRoot, anc25d.getComm(alvl));
82 }
83
84 if (anc25d.rankHasGrid(k0, alvl))
85 {
86
87 int_t offset = k0 - k_st;
88 int_t ksupc = SuperSize(k);
89 /*======= Diagonal Factorization ======*/
90 if (iam == procIJ(k, k))
91 {
92 lPanelVec[g2lCol(k)].diagFactor(k, dFBufs[offset]->BlockUFactor, ksupc,
93 thresh, xsup, options, stat, info);
94 lPanelVec[g2lCol(k)].packDiagBlock(dFBufs[offset]->BlockLFactor, ksupc);
95 }
96
97 /*======= Diagonal Broadcast ======*/
98 if (myrow == krow(k))
99 MPI_Bcast((void *)dFBufs[offset]->BlockLFactor, ksupc * ksupc,
100 get_mpi_type<Ftype>(), kcol(k), (grid->rscp).comm);
101 if (mycol == kcol(k))
102 MPI_Bcast((void *)dFBufs[offset]->BlockUFactor, ksupc * ksupc,
103 get_mpi_type<Ftype>(), krow(k), (grid->cscp).comm);
104
105 /*======= Panel Update ======*/
106 if (myrow == krow(k))
107 uPanelVec[g2lRow(k)].panelSolve(ksupc, dFBufs[offset]->BlockLFactor, ksupc);
108
109 if (mycol == kcol(k))
110 lPanelVec[g2lCol(k)].panelSolve(ksupc, dFBufs[offset]->BlockUFactor, ksupc);
111
112 /*======= Panel Broadcast ======*/
113 xupanel_t<Ftype> k_upanel(UidxRecvBufs[0], UvalRecvBufs[0]);
114 xlpanel_t<Ftype> k_lpanel(LidxRecvBufs[0], LvalRecvBufs[0]);
115 if (myrow == krow(k))
116 {
117 k_upanel = uPanelVec[g2lRow(k)];
118 }
119 if (mycol == kcol(k))
120 k_lpanel = lPanelVec[g2lCol(k)];
121
122 if (UidxSendCounts[k] > 0)
123 {
124 MPI_Bcast(k_upanel.index, UidxSendCounts[k], mpi_int_t, krow(k), grid3d->cscp.comm);
125 MPI_Bcast(k_upanel.val, UvalSendCounts[k], get_mpi_type<Ftype>(), krow(k), grid3d->cscp.comm);
126 }
127
128 if (LidxSendCounts[k] > 0)
129 {
130 MPI_Bcast(k_lpanel.index, LidxSendCounts[k], mpi_int_t, kcol(k), grid3d->rscp.comm);
131 MPI_Bcast(k_lpanel.val, LvalSendCounts[k], get_mpi_type<Ftype>(), kcol(k), grid3d->rscp.comm);
132 }
133
134/*======= Schurcomplement Update ======*/
135#warning single node only
136 // dSchurComplementUpdate(k, lPanelVec[g2lCol(k)], uPanelVec[g2lRow(k)]);
137 // dSchurComplementUpdate(k, lPanelVec[g2lCol(k)], k_upanel);
138 if (UidxSendCounts[k] > 0 && LidxSendCounts[k] > 0)
139 {
140 k_upanel.checkCorrectness();
141 dSchurComplementUpdate(k, k_lpanel, k_upanel);
142 }
143 }
145 // Brodcast the l and u panels to the root with MPI_Comm = anc25d.getComm(alvl);
146 if (mycol == kcol(k))
147 MPI_Bcast(lPanelVec[g2lCol(k)].val,
148 lPanelVec[g2lCol(k)].nzvalSize(), get_mpi_type<Ftype>(), kRoot, anc25d.getComm(alvl));
149
150 if (myrow == krow(k))
151 MPI_Bcast(uPanelVec[g2lRow(k)].val,
152 uPanelVec[g2lRow(k)].nzvalSize(), get_mpi_type<Ftype>(), kRoot, anc25d.getComm(alvl));
153 // MPI_Barrier(grid3d->comm);
154
155 } /*for k0= k_st:k_end */
156
157 } /*for topoLvl = 0:maxTopoLevel*/
158
159#if (DEBUGlevel >= 1)
160 CHECK_MALLOC(grid3d->iam, "Exit dAncestorFactor_ASYNC()");
161#endif
162
163 return 0;
164} /* dAncestorFactor_ASYNC */
Definition: xlupanels.hpp:22
int_t * index
Definition: xlupanels.hpp:24
Ftype * val
Definition: xlupanels.hpp:25
Definition: xlupanels.hpp:176
int_t * index
Definition: xlupanels.hpp:178
int_t checkCorrectness()
Definition: xlupanels.hpp:276
Ftype * val
Definition: xlupanels.hpp:179
typename std::conditional< std::is_same< Ftype, double >::value, ddiagFactBufs_t, typename std::conditional< std::is_same< Ftype, float >::value, sdiagFactBufs_t, typename std::conditional< std::is_same< Ftype, doublecomplex >::value, zdiagFactBufs_t, void >::type >::type >::type diagFactBufs_type
Definition: luAuxStructTemplated.hpp:147
MPI_Comm * initComm(gridinfo3d_t *grid3d)
Definition: anc25d.cpp:5
int maxLvl
Definition: anc25d.hpp:16
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
superlu_scope_t zscp
Definition: superlu_defs.h:418
superlu_scope_t rscp
Definition: superlu_defs.h:416
int iam
Definition: superlu_defs.h:420
superlu_scope_t cscp
Definition: superlu_defs.h:417
Definition: superlu_defs.h:989
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:999
int_t * nodeList
Definition: superlu_defs.h:992
int_t nNodes
Definition: superlu_defs.h:991
int Np
Definition: superlu_defs.h:399
MPI_Comm comm
Definition: superlu_defs.h:398
int Iam
Definition: superlu_defs.h:400
Definition: superlu_defs.h:970
int_t numLvl
Definition: superlu_defs.h:971
int_t * eTreeTopLims
Definition: superlu_defs.h:972
int_t dAncestorFactorBaseline(int_t alvl, sForest_t *sforest, diagFactBufs_type< Ftype > **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: anc25d_impl.hpp:29
int_t log2i(int_t index)
Definition: supernodal_etree.c:17
#define SuperSize(bnum)
Definition: superlu_defs.h:271
#define mpi_int_t
Definition: superlu_defs.h:120
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:186
int64_t int_t
Definition: superlu_defs.h:119
#define SUPERLU_MALLOC(size)
Definition: util_dist.h:48
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:56