28 #ifndef DISTRIBUTED_MATRIX_HPP
29 #define DISTRIBUTED_MATRIX_HPP
37 #include "misc/RandomWrapper.hpp"
43 inline int indxl2g(
int INDXLOC,
int NB,
int IPROC,
int ISRCPROC,
int NPROCS)
44 {
return NPROCS*NB*((INDXLOC-1)/NB) + (INDXLOC-1) % NB +
45 ((NPROCS+IPROC-ISRCPROC) % NPROCS)*NB + 1; }
46 inline int indxg2l(
int INDXGLOB,
int NB,
int IPROC,
int ISRCPROC,
int NPROCS)
47 {
return NB*((INDXGLOB-1)/(NB*NPROCS)) + (INDXGLOB-1) % NB + 1; }
48 inline int indxg2p(
int INDXGLOB,
int NB,
int IPROC,
int ISRCPROC,
int NPROCS)
49 {
return ( ISRCPROC + (INDXGLOB - 1) / NB ) % NPROCS; }
53 using real_t =
typename RealType<scalar_t>::value_type;
57 scalar_t* data_ =
nullptr;
66 const std::function<scalar_t(std::size_t,
87 const int* desc()
const {
return desc_; }
88 int* desc() {
return desc_; }
89 bool active()
const {
return grid() && grid()->
active(); }
91 const BLACSGrid* grid()
const {
return grid_; }
92 const MPIComm& Comm()
const {
return grid()->
Comm(); }
93 MPI_Comm comm()
const {
return Comm().
comm(); }
95 int ctxt()
const {
return grid() ? grid()->
ctxt() : -1; }
96 int ctxt_all()
const {
return grid() ? grid()->
ctxt_all() : -1; }
98 virtual int rows()
const {
return desc_[2]; }
99 virtual int cols()
const {
return desc_[3]; }
100 int lrows()
const {
return lrows_; }
101 int lcols()
const {
return lcols_; }
102 int ld()
const {
return lrows_; }
103 int MB()
const {
return desc_[4]; }
104 int NB()
const {
return desc_[5]; }
105 int rowblocks()
const {
return std::ceil(
float(lrows()) / MB()); }
106 int colblocks()
const {
return std::ceil(
float(lcols()) / NB()); }
108 virtual int I()
const {
return 1; }
109 virtual int J()
const {
return 1; }
110 virtual void lranges(
int& rlo,
int& rhi,
int& clo,
int& chi)
const;
112 const scalar_t* data()
const {
return data_; }
113 scalar_t* data() {
return data_; }
114 const scalar_t& operator()(
int r,
int c)
const
115 {
return data_[r+ld()*c]; }
116 scalar_t& operator()(
int r,
int c) {
return data_[r+ld()*c]; }
118 int prow()
const { assert(grid());
return grid()->
prow(); }
119 int pcol()
const { assert(grid());
return grid()->
pcol(); }
120 int nprows()
const { assert(grid());
return grid()->
nprows(); }
121 int npcols()
const { assert(grid());
return grid()->
npcols(); }
122 int npactives()
const { assert(grid());
return grid()->
npactives(); }
124 bool is_master()
const {
return grid() && prow() == 0 && pcol() == 0; }
125 int rowl2g(
int row)
const { assert(grid());
126 return indxl2g(row+1, MB(), prow(), 0, nprows()) - I(); }
127 int coll2g(
int col)
const { assert(grid());
128 return indxl2g(col+1, NB(), pcol(), 0, npcols()) - J(); }
129 int rowg2l(
int row)
const { assert(grid());
130 return indxg2l(row+I(), MB(), prow(), 0, nprows()) - 1; }
131 int colg2l(
int col)
const { assert(grid());
132 return indxg2l(col+J(), NB(), pcol(), 0, npcols()) - 1; }
133 int rowg2p(
int row)
const { assert(grid());
134 return indxg2p(row+I(), MB(), prow(), 0, nprows()); }
135 int colg2p(
int col)
const { assert(grid());
136 return indxg2p(col+J(), NB(), pcol(), 0, npcols()); }
137 int rank(
int r,
int c)
const {
138 return rowg2p(r) + colg2p(c) * nprows(); }
139 bool is_local(
int r,
int c)
const { assert(grid());
140 return rowg2p(r) == prow() && colg2p(c) == pcol();
143 bool fixed()
const {
return MB()==default_MB && NB()==default_NB; }
144 int rowl2g_fixed(
int row)
const {
145 assert(grid() && fixed());
146 return indxl2g(row+1, default_MB, prow(), 0, nprows()) - I(); }
147 int coll2g_fixed(
int col)
const {
148 assert(grid() && fixed());
149 return indxl2g(col+1, default_NB, pcol(), 0, npcols()) - J(); }
150 int rowg2l_fixed(
int row)
const {
151 assert(grid() && fixed());
152 return indxg2l(row+I(), default_MB, prow(), 0, nprows()) - 1; }
153 int colg2l_fixed(
int col)
const {
154 assert(grid() && fixed());
155 return indxg2l(col+J(), default_NB, pcol(), 0, npcols()) - 1; }
156 int rowg2p_fixed(
int row)
const {
157 assert(grid() && fixed());
158 return indxg2p(row+I(), default_MB, prow(), 0, nprows()); }
159 int colg2p_fixed(
int col)
const {
160 assert(grid() && fixed());
161 return indxg2p(col+J(), default_NB, pcol(), 0, npcols()); }
162 int rank_fixed(
int r,
int c)
const {
163 assert(grid() && fixed());
return rowg2p_fixed(r) + colg2p_fixed(c) * nprows(); }
164 bool is_local_fixed(
int r,
int c)
const {
165 assert(grid() && fixed());
166 return rowg2p_fixed(r) == prow() && colg2p_fixed(c) == pcol(); }
169 const scalar_t& global(
int r,
int c)
const
170 { assert(is_local(r, c));
return operator()(rowg2l(r),colg2l(c)); }
171 scalar_t& global(
int r,
int c)
172 { assert(is_local(r, c));
return operator()(rowg2l(r),colg2l(c)); }
173 scalar_t& global_fixed(
int r,
int c) {
174 assert(is_local(r, c)); assert(fixed());
175 return operator()(rowg2l_fixed(r),colg2l_fixed(c)); }
176 void global(
int r,
int c, scalar_t v) {
177 if (active() && is_local(r, c)) operator()(rowg2l(r),colg2l(c)) = v; }
178 scalar_t all_global(
int r,
int c)
const;
180 void print()
const { print(
"A"); }
181 void print(std::string name,
int precision=15)
const;
182 void print_to_file(std::string name, std::string filename,
184 void print_to_files(std::string name,
int precision=16)
const;
189 void fill(scalar_t a);
190 void fill(
const std::function<scalar_t(std::size_t,
193 void shift(scalar_t sigma);
195 virtual void resize(std::size_t m, std::size_t n);
202 void laswp(
const std::vector<int>& P,
bool fwd);
205 extract_rows(
const std::vector<std::size_t>& Ir)
const;
207 extract_cols(
const std::vector<std::size_t>& Ic)
const;
210 extract(
const std::vector<std::size_t>& I,
211 const std::vector<std::size_t>& J)
const;
219 real_t normF()
const;
220 real_t norm1()
const;
221 real_t normI()
const;
223 virtual std::size_t memory()
const
224 {
return sizeof(scalar_t)*std::size_t(lrows())*std::size_t(lcols()); }
225 virtual std::size_t total_memory()
const
226 {
return sizeof(scalar_t)*std::size_t(rows())*std::size_t(cols()); }
227 virtual std::size_t nonzeros()
const
228 {
return std::size_t(lrows())*std::size_t(lcols()); }
229 virtual std::size_t total_nonzeros()
const
230 {
return std::size_t(rows())*std::size_t(cols()); }
240 std::vector<int> LU();
241 int LU(std::vector<int>&);
245 const std::vector<int>& piv)
const;
250 void orthogonalize(scalar_t& r_max, scalar_t& r_min);
253 std::vector<std::size_t>& ind,
254 real_t rel_tol, real_t abs_tol,
int max_rank);
256 std::vector<std::size_t>& ind, real_t rel_tol, real_t abs_tol,
259 static const int default_MB = STRUMPACK_PBLAS_BLOCKSIZE;
260 static const int default_NB = STRUMPACK_PBLAS_BLOCKSIZE;
267 template<
typename scalar_t>
void
270 int dest,
int context_all);
272 template<
typename scalar_t>
void
278 template<
typename scalar_t>
void
281 std::size_t ib, std::size_t jb,
int context_all);
288 template<
typename scalar_t>
291 int _rows, _cols, _i, _j;
294 _rows(0), _cols(0), _i(0), _j(0) {}
301 std::size_t i, std::size_t j);
305 int MB,
int NB, scalar_t* A);
316 int rows()
const override {
return _rows; }
317 int cols()
const override {
return _cols; }
318 int I()
const override {
return _i+1; }
319 int J()
const override {
return _j+1; }
320 void lranges(
int& rlo,
int& rhi,
int& clo,
int& chi)
const override;
322 void resize(std::size_t m, std::size_t n)
override { assert(1); }
326 std::size_t memory()
const override {
return 0; }
327 std::size_t total_memory()
const override {
return 0; }
328 std::size_t nonzeros()
const override {
return 0; }
329 std::size_t total_nonzeros()
const override {
return 0; }
340 template<
typename scalar_t>
long long int
342 if (!a.active())
return 0;
343 return (is_complex<scalar_t>() ? 4:1) *
344 blas::getrf_flops(a.rows(), a.cols()) /
348 template<
typename scalar_t>
long long int
349 solve_flops(
const DistributedMatrix<scalar_t>& b) {
350 if (!b.active())
return 0;
351 return (is_complex<scalar_t>() ? 4:1) *
352 blas::getrs_flops(b.rows(), b.cols()) /
356 template<
typename scalar_t>
long long int
357 LQ_flops(
const DistributedMatrix<scalar_t>& a) {
358 if (!a.active())
return 0;
359 auto minrc = std::min(a.rows(), a.cols());
360 return (is_complex<scalar_t>() ? 4:1) *
361 (blas::gelqf_flops(a.rows(), a.cols()) +
362 blas::xxglq_flops(a.cols(), a.cols(), minrc)) /
366 template<
typename scalar_t>
long long int
367 ID_row_flops(
const DistributedMatrix<scalar_t>& a,
int rank) {
368 if (!a.active())
return 0;
369 return (is_complex<scalar_t>() ? 4:1) *
370 (blas::geqp3_flops(a.cols(), a.rows())
375 template<
typename scalar_t>
long long int
376 trsm_flops(
Side s, scalar_t alpha,
const DistributedMatrix<scalar_t>& a,
377 const DistributedMatrix<scalar_t>& b) {
378 if (!a.active())
return 0;
379 return (is_complex<scalar_t>() ? 4:1) *
384 template<
typename scalar_t>
long long int
386 const DistributedMatrix<scalar_t>& a,
387 const DistributedMatrix<scalar_t>& b, scalar_t beta) {
388 if (!a.active())
return 0;
389 return (is_complex<scalar_t>() ? 4:1) *
391 ((ta==
Trans::N) ? a.rows() : a.cols(),
392 (tb==
Trans::N) ? b.cols() : b.rows(),
393 (ta==
Trans::N) ? a.cols() : a.rows(), alpha, beta) /
397 template<
typename scalar_t>
long long int
398 gemv_flops(
Trans ta,
const DistributedMatrix<scalar_t>& a,
399 scalar_t alpha, scalar_t beta) {
400 if (!a.active())
return 0;
401 auto m = (ta==
Trans::N) ? a.rows() : a.cols();
402 auto n = (ta==
Trans::N) ? a.cols() : a.rows();
403 return (is_complex<scalar_t>() ? 4:1) *
404 ((alpha != scalar_t(0.)) * m * (n * 2 - 1) +
405 (alpha != scalar_t(1.) && alpha != scalar_t(0.)) * m +
406 (beta != scalar_t(0.) && beta != scalar_t(1.)) * m +
407 (alpha != scalar_t(0.) && beta != scalar_t(0.)) * m) /
411 template<
typename scalar_t>
long long int
413 if (!a.active())
return 0;
414 auto minrc = std::min(a.rows(), a.cols());
415 return (is_complex<scalar_t>() ? 4:1) *
416 (blas::geqrf_flops(a.rows(), minrc) +
417 blas::xxgqr_flops(a.rows(), minrc, minrc)) /
422 template<
typename scalar_t>
423 std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
424 ConstDistributedMatrixWrapperPtr
425 (std::size_t m, std::size_t n,
const DistributedMatrix<scalar_t>& D,
426 std::size_t i, std::size_t j) {
427 return std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
428 (
new DistributedMatrixWrapper<scalar_t>
429 (m, n,
const_cast<DistributedMatrix<scalar_t>&
>(D), i, j));
433 template<
typename scalar_t>
void gemm
434 (
Trans ta,
Trans tb, scalar_t alpha,
const DistributedMatrix<scalar_t>& A,
435 const DistributedMatrix<scalar_t>& B, scalar_t beta,
436 DistributedMatrix<scalar_t>& C);
438 template<
typename scalar_t>
void trsm
440 const DistributedMatrix<scalar_t>& A, DistributedMatrix<scalar_t>& B);
442 template<
typename scalar_t>
void trsv
443 (
UpLo ul,
Trans ta,
Diag d,
const DistributedMatrix<scalar_t>& A,
444 DistributedMatrix<scalar_t>& B);
446 template<
typename scalar_t>
void gemv
447 (
Trans ta, scalar_t alpha,
const DistributedMatrix<scalar_t>& A,
448 const DistributedMatrix<scalar_t>& X, scalar_t beta,
449 DistributedMatrix<scalar_t>& Y);
451 template<
typename scalar_t> DistributedMatrix<scalar_t>
vconcat
452 (
int cols,
int arows,
int brows,
const DistributedMatrix<scalar_t>& a,
453 const DistributedMatrix<scalar_t>& b,
const BLACSGrid* gnew,
int cxt_all);
455 template<
typename scalar_t>
void subgrid_copy_to_buffers
456 (
const DistributedMatrix<scalar_t>& a,
const DistributedMatrix<scalar_t>& b,
457 int p0,
int npr,
int npc, std::vector<std::vector<scalar_t>>& sbuf);
459 template<
typename scalar_t>
void subproc_copy_to_buffers
460 (
const DenseMatrix<scalar_t>& a,
const DistributedMatrix<scalar_t>& b,
461 int p0,
int npr,
int npc, std::vector<std::vector<scalar_t>>& sbuf);
463 template<
typename scalar_t>
void subgrid_add_from_buffers
464 (
const BLACSGrid* subg,
int master, DistributedMatrix<scalar_t>& b,
465 std::vector<scalar_t*>& pbuf);
Contains a wrapper class around a BLACS grid/context.
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
Contains some simple C++ MPI wrapper utilities.
This is a small wrapper class around a BLACS grid and a BLACS context.
Definition: BLACSGrid.hpp:66
bool active() const
Definition: BLACSGrid.hpp:264
int npcols() const
Definition: BLACSGrid.hpp:232
int ctxt_all() const
Definition: BLACSGrid.hpp:220
int npactives() const
Definition: BLACSGrid.hpp:257
int pcol() const
Definition: BLACSGrid.hpp:244
const MPIComm & Comm() const
Definition: BLACSGrid.hpp:196
int nprows() const
Definition: BLACSGrid.hpp:226
int ctxt() const
Definition: BLACSGrid.hpp:209
int prow() const
Definition: BLACSGrid.hpp:238
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Definition: DistributedMatrix.hpp:289
Definition: DistributedMatrix.hpp:52
Wrapper class around an MPI_Comm object.
Definition: MPIWrapper.hpp:194
MPI_Comm comm() const
Definition: MPIWrapper.hpp:261
class to wrap the C++11 random number generator/distribution
Definition: RandomWrapper.hpp:105
Definition: StrumpackOptions.hpp:42
long long int LU_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1452
long long int gemm_flops(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta)
Definition: DenseMatrix.hpp:1491
UpLo
Definition: DenseMatrix.hpp:83
DenseMatrix< scalar_t > vconcat(const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1280
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
long long int orthogonalize_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1513
long long int trsm_flops(Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1483
Side
Definition: DenseMatrix.hpp:74
void copy(std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb)
Definition: DenseMatrix.hpp:1231
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
long long int ID_row_flops(const DenseMatrix< scalar_t > &a, int rank)
Definition: DenseMatrix.hpp:1475
long long int LQ_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1466
long long int solve_flops(const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1459
Diag
Definition: DenseMatrix.hpp:92