SuperLU Distributed 9.0.0
gpu3d
lupanels_comm3d_impl.hpp
Go to the documentation of this file.
1#pragma once
2#include "mpi.h"
3#include "superlu_defs.h"
4#include "lupanels.hpp"
6#include "superlu_blas.hpp"
7
8template <typename Ftype>
10 int_t **treePerm)
11{
12 int_t maxLvl = log2i(grid3d->zscp.Np) + 1;
13 int_t myGrid = grid3d->zscp.Iam;
14
15 int_t sender, receiver;
16 if ((myGrid % (1 << (ilvl + 1))) == 0)
17 {
18 sender = myGrid + (1 << ilvl);
19 receiver = myGrid;
20 }
21 else
22 {
23 sender = myGrid;
24 receiver = myGrid - (1 << ilvl);
25 }
26
27 /*Reduce all the ancestors*/
28 for (int_t alvl = ilvl + 1; alvl < maxLvl; ++alvl)
29 {
30 /* code */
31 // int_t atree = myTreeIdxs[alvl];
32 int_t numNodes = myNodeCount[alvl];
33 int_t *nodeList = treePerm[alvl];
34 double treduce = SuperLU_timer_();
35
36
37 /*first setting the L blocks to zero*/
38 for (int_t node = 0; node < numNodes; ++node) /* for each block column ... */
39 {
40 int_t k0 = nodeList[node];
41
42 if (myGrid == sender)
43 {
44 zSendLPanel(k0, receiver);
45 zSendUPanel(k0, receiver);
46 }
47 else
48 {
49 Ftype alpha = one<Ftype>(); Ftype beta = one<Ftype>();
50
51 zRecvLPanel(k0, sender, alpha, beta);
52 zRecvUPanel(k0, sender, alpha, beta);
53 }
54 }
55 // return 0;
56 SCT->ancsReduce += SuperLU_timer_() - treduce;
57 }
58 return 0;
59}
60
61template <typename Ftype>
63{
64
65 if (mycol == kcol(k0))
66 {
67 int_t lk = g2lCol(k0);
68 if (!lPanelVec[lk].isEmpty())
69 {
70 MPI_Send(lPanelVec[lk].blkPtr(0), lPanelVec[lk].nzvalSize(),
71 get_mpi_type<Ftype>(), receiverGrid, k0, grid3d->zscp.comm);
72 SCT->commVolRed += lPanelVec[lk].nzvalSize() * sizeof(Ftype);
73 }
74 }
75 return 0;
76}
77
78template <typename Ftype>
79int_t xLUstruct_t<Ftype>::zRecvLPanel(int_t k0, int_t senderGrid, Ftype alpha, Ftype beta)
80{
81 if (mycol == kcol(k0))
82 {
83 int_t lk = g2lCol(k0);
84 if (!lPanelVec[lk].isEmpty())
85 {
86
87 MPI_Status status;
88 MPI_Recv(LvalRecvBufs[0], lPanelVec[lk].nzvalSize(), get_mpi_type<Ftype>(), senderGrid, k0,
89 grid3d->zscp.comm, &status);
90
91 /*reduce the updates*/
92 superlu_scal<Ftype>(lPanelVec[lk].nzvalSize(), alpha, lPanelVec[lk].blkPtr(0), 1);
93 superlu_axpy<Ftype>(lPanelVec[lk].nzvalSize(), beta, LvalRecvBufs[0], 1, lPanelVec[lk].blkPtr(0), 1);
94 }
95 }
96 return 0;
97}
98
99template <typename Ftype>
101{
102
103 if (myrow == krow(k0))
104 {
105 int_t lk = g2lRow(k0);
106 if (!uPanelVec[lk].isEmpty())
107 {
108 MPI_Send(uPanelVec[lk].blkPtr(0), uPanelVec[lk].nzvalSize(),
109 get_mpi_type<Ftype>(), receiverGrid, k0, grid3d->zscp.comm);
110 SCT->commVolRed += uPanelVec[lk].nzvalSize() * sizeof(Ftype);
111 }
112 }
113 return 0;
114}
115
116template <typename Ftype>
117int_t xLUstruct_t<Ftype>::zRecvUPanel(int_t k0, int_t senderGrid, Ftype alpha, Ftype beta)
118{
119 if (myrow == krow(k0))
120 {
121 int_t lk = g2lRow(k0);
122 if (!uPanelVec[lk].isEmpty())
123 {
124
125 MPI_Status status;
126 MPI_Recv(UvalRecvBufs[0], uPanelVec[lk].nzvalSize(), get_mpi_type<Ftype>(), senderGrid, k0,
127 grid3d->zscp.comm, &status);
128
129 /*reduce the updates*/
130 superlu_scal<Ftype>(uPanelVec[lk].nzvalSize(), alpha, uPanelVec[lk].blkPtr(0), 1);
131 superlu_axpy<Ftype>(uPanelVec[lk].nzvalSize(), beta, UvalRecvBufs[0], 1, uPanelVec[lk].blkPtr(0), 1);
132 }
133 }
134 return 0;
135}
136
137
int_t zSendLPanel(int_t k0, int_t receiverGrid)
Definition: lupanels_comm3d_impl.hpp:62
int_t zSendUPanel(int_t k0, int_t receiverGrid)
Definition: lupanels_comm3d_impl.hpp:100
int_t zRecvUPanel(int_t k0, int_t senderGrid, Ftype alpha, Ftype beta)
Definition: lupanels_comm3d_impl.hpp:117
int_t ancestorReduction3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm)
Definition: lupanels_comm3d_impl.hpp:9
int_t zRecvLPanel(int_t k0, int_t senderGrid, Ftype alpha, Ftype beta)
Definition: lupanels_comm3d_impl.hpp:79
Definitions which are precision-neutral.
int_t log2i(int_t index)
Definition: supernodal_etree.c:17
int64_t int_t
Definition: superlu_defs.h:119
double SuperLU_timer_()
Definition: superlu_timer.c:72