SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
dHWPM_CombBLAS.hpp
Go to the documentation of this file.
1
22#pragma once
23#ifndef dHWPM_CombBLAS_hpp
24#define dHWPM_CombBLAS_hpp
25
26#include "CombBLAS/CombBLAS.h"
27#include "ApproxWeightPerfectMatching.h"
28#include "superlu_ddefs.h"
29
30
59void
61{
62 NRformat_loc *Astore;
63 int_t i, irow, fst_row, j, jcol, m, n, m_loc;
64 int_t lirow, ljcol;
65 int_t nnz_loc; /* number of local nonzeros */
66 double *nzval_a;
67 int iam, p, procs;
68 int_t *perm=nullptr; // placeholder for load balancing permutation for CombBLAS
69 procs = grid->nprow * grid->npcol;
70
71 if(grid->nprow != grid->npcol)
72 {
73 printf("HWPM only supports square process grid. Retuning without a permutation.\n");
74 }
75 combblas::SpParMat < int_t, double, combblas::SpDCCols<int_t,double> > Adcsc(grid->comm);
76 std::vector< std::vector < std::tuple<int_t,int_t,double> > > data(procs);
77
78 /* ------------------------------------------------------------
79 INITIALIZATION.
80 ------------------------------------------------------------*/
81 iam = grid->iam;
82#if ( DEBUGlevel>=1 )
83 CHECK_MALLOC(iam, "Enter pdCSR_loc_to_2DBlock()");
84#endif
85 Astore = (NRformat_loc *) A->Store;
86 n = A->ncol;
87 m = A->nrow;
88 m_loc = Astore->m_loc;
89 fst_row = Astore->fst_row;
90
91 /* ------------------------------------------------------------
92 FIRST PASS OF A:
93 COUNT THE NUMBER OF NONZEROS TO BE SENT TO EACH PROCESS,
94 THEN ALLOCATE SPACE.
95 Re-distribute A from distributed CSR storage to 2D block storage
96 conforming CombBLAS API.
97 ------------------------------------------------------------*/
98 nzval_a = (double *) Astore->nzval;
99 nnz_loc = 0;
100 for (i = 0; i < m_loc; ++i) {
101 for (j = Astore->rowptr[i]; j < Astore->rowptr[i+1]; ++j) {
102 if(perm != NULL)
103 {
104 irow = perm[i+fst_row]; /* Row number in P*A*P^T */
105 jcol = perm[Astore->colind[j]]; /* Column number in P*A*P^T */
106 }
107 else
108 {
109 irow = i+fst_row;
110 jcol = Astore->colind[j];
111 }
112 p = Adcsc.Owner(m, n , irow, jcol, lirow, ljcol);
113 ++ nnz_loc;
114 data[p].push_back(std::make_tuple(lirow,ljcol,nzval_a[j]));
115
116 }
117 }
118
119 Adcsc.SparseCommon(data, nnz_loc, m, n, std::plus<double>());
120 combblas::FullyDistVec<int_t, int_t> mateRow2Col ( Adcsc.getcommgrid(), m, (int_t) -1);
121 combblas::FullyDistVec<int_t, int_t> mateCol2Row ( Adcsc.getcommgrid(), n, (int_t) -1);
122 combblas::AWPM(Adcsc, mateRow2Col, mateCol2Row,true);
123
124 // now gather the matching vector
125 MPI_Comm World = mateRow2Col.getcommgrid()->GetWorld();
126 int * rdispls = new int[procs];
127 int sendcnt = mateRow2Col.LocArrSize();
128 int * recvcnt = new int[procs];
129 MPI_Allgather(&sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World);
130 rdispls[0] = 0;
131 for(int i=0; i<procs-1; ++i)
132 {
133 rdispls[i+1] = rdispls[i] + recvcnt[i];
134 }
135 int_t *senddata = (int_t *)mateRow2Col.GetLocArr();
136
137 MPI_Allgatherv(senddata, sendcnt, combblas::MPIType<int_t>(), ScalePermstruct->perm_r, recvcnt, rdispls, combblas::MPIType<int_t>(), World);
138
139 delete[] rdispls;
140 delete[] recvcnt;
141
142#if ( DEBUGlevel>=1 )
143 CHECK_MALLOC(iam, "Exit dHWPM_CombBLAS()");
144#endif
145}
146
147#endif /* dHWPM_CombBLAS_hpp */
int j
Definition: dutil_dist.c:248
int i
Definition: dutil_dist.c:248
int int_t
Definition: superlu_defs.h:114
#define CHECK_MALLOC(pnum, where)
Definition: util_dist.h:50
void dGetHWPM(SuperMatrix *A, gridinfo_t *grid, dScalePermstruct_t *ScalePermstruct)
Definition: dHWPM_CombBLAS.hpp:60
Definition: supermatrix.h:176
void * nzval
Definition: supermatrix.h:180
int_t * rowptr
Definition: supermatrix.h:181
int_t * colind
Definition: supermatrix.h:183
int_t m_loc
Definition: supermatrix.h:178
int_t fst_row
Definition: supermatrix.h:179
Definition: supermatrix.h:54
void * Store
Definition: supermatrix.h:62
int_t nrow
Definition: supermatrix.h:60
int_t ncol
Definition: supermatrix.h:61
Definition: superlu_ddefs.h:76
int_t * perm_r
Definition: superlu_ddefs.h:80
Definition: superlu_defs.h:388
int_t nprow
Definition: superlu_defs.h:393
int_t npcol
Definition: superlu_defs.h:394
MPI_Comm comm
Definition: superlu_defs.h:389
int iam
Definition: superlu_defs.h:392
Distributed SuperLU data types and function prototypes.