DistributedMatrix.hpp
1 /*
2  * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3  * Regents of the University of California, through Lawrence Berkeley
4  * National Laboratory (subject to receipt of any required approvals
5  * from the U.S. Dept. of Energy). All rights reserved.
6  *
7  * If you have questions about your rights to use or distribute this
8  * software, please contact Berkeley Lab's Technology Transfer
9  * Department at TTD@lbl.gov.
10  *
11  * NOTICE. This software is owned by the U.S. Department of Energy. As
12  * such, the U.S. Government has been granted for itself and others
13  * acting on its behalf a paid-up, nonexclusive, irrevocable,
14  * worldwide license in the Software to reproduce, prepare derivative
15  * works, and perform publicly and display publicly. Beginning five
16  * (5) years after the date permission to assert copyright is obtained
17  * from the U.S. Department of Energy, and subject to any subsequent
18  * five (5) year renewals, the U.S. Government is granted for itself
19  * and others acting on its behalf a paid-up, nonexclusive,
20  * irrevocable, worldwide license in the Software to reproduce,
21  * prepare derivative works, distribute copies to the public, perform
22  * publicly and display publicly, and to permit others to do so.
23  *
24  * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25  * (Lawrence Berkeley National Lab, Computational Research
26  * Division).
27  */
28 #ifndef DISTRIBUTED_MATRIX_HPP
29 #define DISTRIBUTED_MATRIX_HPP
30 
31 #include <vector>
32 #include <string>
33 #include <cmath>
34 #include <functional>
35 
36 #include "misc/MPIWrapper.hpp"
37 #include "misc/RandomWrapper.hpp"
38 #include "DenseMatrix.hpp"
39 #include "BLACSGrid.hpp"
40 
41 namespace strumpack {
42 
43  inline int indxl2g(int INDXLOC, int NB, int IPROC, int ISRCPROC, int NPROCS)
44  { return NPROCS*NB*((INDXLOC-1)/NB) + (INDXLOC-1) % NB +
45  ((NPROCS+IPROC-ISRCPROC) % NPROCS)*NB + 1; }
46  inline int indxg2l(int INDXGLOB, int NB, int IPROC, int ISRCPROC, int NPROCS)
47  { return NB*((INDXGLOB-1)/(NB*NPROCS)) + (INDXGLOB-1) % NB + 1; }
48  inline int indxg2p(int INDXGLOB, int NB, int IPROC, int ISRCPROC, int NPROCS)
49  { return ( ISRCPROC + (INDXGLOB - 1) / NB ) % NPROCS; }
50 
51 
52  template<typename scalar_t> class DistributedMatrix {
53  using real_t = typename RealType<scalar_t>::value_type;
54 
55  protected:
56  const BLACSGrid* grid_ = nullptr;
57  scalar_t* data_ = nullptr;
58  int lrows_;
59  int lcols_;
60  int desc_[9];
61 
62  public:
64  DistributedMatrix(const BLACSGrid* g, int M, int N);
65  DistributedMatrix(const BLACSGrid* g, int M, int N,
66  const std::function<scalar_t(std::size_t,
67  std::size_t)>& A);
71  DistributedMatrix(const BLACSGrid* g, int M, int N,
73  int context_all);
74  DistributedMatrix(const BLACSGrid* g, int M, int N, int MB, int NB);
75  DistributedMatrix(const BLACSGrid* g, int desc[9]);
76 
79  virtual ~DistributedMatrix();
80 
82  operator=(const DistributedMatrix<scalar_t>& m);
84  operator=(DistributedMatrix<scalar_t>&& m);
85 
86 
87  const int* desc() const { return desc_; }
88  int* desc() { return desc_; }
89  bool active() const { return grid() && grid()->active(); }
90 
91  const BLACSGrid* grid() const { return grid_; }
92  const MPIComm& Comm() const { return grid()->Comm(); }
93  MPI_Comm comm() const { return Comm().comm(); }
94 
95  int ctxt() const { return grid() ? grid()->ctxt() : -1; }
96  int ctxt_all() const { return grid() ? grid()->ctxt_all() : -1; }
97 
98  virtual int rows() const { return desc_[2]; }
99  virtual int cols() const { return desc_[3]; }
100  int lrows() const { return lrows_; }
101  int lcols() const { return lcols_; }
102  int ld() const { return lrows_; }
103  int MB() const { return desc_[4]; }
104  int NB() const { return desc_[5]; }
105  int rowblocks() const { return std::ceil(float(lrows()) / MB()); }
106  int colblocks() const { return std::ceil(float(lcols()) / NB()); }
107 
108  virtual int I() const { return 1; }
109  virtual int J() const { return 1; }
110  virtual void lranges(int& rlo, int& rhi, int& clo, int& chi) const;
111 
112  const scalar_t* data() const { return data_; }
113  scalar_t* data() { return data_; }
114  const scalar_t& operator()(int r, int c) const
115  { return data_[r+ld()*c]; }
116  scalar_t& operator()(int r, int c) { return data_[r+ld()*c]; }
117 
118  int prow() const { assert(grid()); return grid()->prow(); }
119  int pcol() const { assert(grid()); return grid()->pcol(); }
120  int nprows() const { assert(grid()); return grid()->nprows(); }
121  int npcols() const { assert(grid()); return grid()->npcols(); }
122  int npactives() const { assert(grid()); return grid()->npactives(); }
123 
124  bool is_master() const { return grid() && prow() == 0 && pcol() == 0; }
125  int rowl2g(int row) const { assert(grid());
126  return indxl2g(row+1, MB(), prow(), 0, nprows()) - I(); }
127  int coll2g(int col) const { assert(grid());
128  return indxl2g(col+1, NB(), pcol(), 0, npcols()) - J(); }
129  int rowg2l(int row) const { assert(grid());
130  return indxg2l(row+I(), MB(), prow(), 0, nprows()) - 1; }
131  int colg2l(int col) const { assert(grid());
132  return indxg2l(col+J(), NB(), pcol(), 0, npcols()) - 1; }
133  int rowg2p(int row) const { assert(grid());
134  return indxg2p(row+I(), MB(), prow(), 0, nprows()); }
135  int colg2p(int col) const { assert(grid());
136  return indxg2p(col+J(), NB(), pcol(), 0, npcols()); }
137  int rank(int r, int c) const {
138  return rowg2p(r) + colg2p(c) * nprows(); }
139  bool is_local(int r, int c) const { assert(grid());
140  return rowg2p(r) == prow() && colg2p(c) == pcol();
141  }
142 
143  bool fixed() const { return MB()==default_MB && NB()==default_NB; }
144  int rowl2g_fixed(int row) const {
145  assert(grid() && fixed());
146  return indxl2g(row+1, default_MB, prow(), 0, nprows()) - I(); }
147  int coll2g_fixed(int col) const {
148  assert(grid() && fixed());
149  return indxl2g(col+1, default_NB, pcol(), 0, npcols()) - J(); }
150  int rowg2l_fixed(int row) const {
151  assert(grid() && fixed());
152  return indxg2l(row+I(), default_MB, prow(), 0, nprows()) - 1; }
153  int colg2l_fixed(int col) const {
154  assert(grid() && fixed());
155  return indxg2l(col+J(), default_NB, pcol(), 0, npcols()) - 1; }
156  int rowg2p_fixed(int row) const {
157  assert(grid() && fixed());
158  return indxg2p(row+I(), default_MB, prow(), 0, nprows()); }
159  int colg2p_fixed(int col) const {
160  assert(grid() && fixed());
161  return indxg2p(col+J(), default_NB, pcol(), 0, npcols()); }
162  int rank_fixed(int r, int c) const {
163  assert(grid() && fixed()); return rowg2p_fixed(r) + colg2p_fixed(c) * nprows(); }
164  bool is_local_fixed(int r, int c) const {
165  assert(grid() && fixed());
166  return rowg2p_fixed(r) == prow() && colg2p_fixed(c) == pcol(); }
167 
168  // TODO fixed versions??
169  const scalar_t& global(int r, int c) const
170  { assert(is_local(r, c)); return operator()(rowg2l(r),colg2l(c)); }
171  scalar_t& global(int r, int c)
172  { assert(is_local(r, c)); return operator()(rowg2l(r),colg2l(c)); }
173  scalar_t& global_fixed(int r, int c) {
174  assert(is_local(r, c)); assert(fixed());
175  return operator()(rowg2l_fixed(r),colg2l_fixed(c)); }
176  void global(int r, int c, scalar_t v) {
177  if (active() && is_local(r, c)) operator()(rowg2l(r),colg2l(c)) = v; }
178  scalar_t all_global(int r, int c) const;
179 
180  void print() const { print("A"); }
181  void print(std::string name, int precision=15) const;
182  void print_to_file(std::string name, std::string filename,
183  int width=8) const;
184  void print_to_files(std::string name, int precision=16) const;
185  void random();
186  void random(random::RandomGeneratorBase<typename RealType<scalar_t>::
187  value_type>& rgen);
188  void zero();
189  void fill(scalar_t a);
190  void fill(const std::function<scalar_t(std::size_t,
191  std::size_t)>& A);
192  void eye();
193  void shift(scalar_t sigma);
194  void clear();
195  virtual void resize(std::size_t m, std::size_t n);
196  virtual void hconcat(const DistributedMatrix<scalar_t>& b);
197  DistributedMatrix<scalar_t> transpose() const;
198 
199  void mult(Trans op, const DistributedMatrix<scalar_t>& X,
200  DistributedMatrix<scalar_t>& Y) const;
201 
202  void laswp(const std::vector<int>& P, bool fwd);
203 
205  extract_rows(const std::vector<std::size_t>& Ir) const;
207  extract_cols(const std::vector<std::size_t>& Ic) const;
208 
210  extract(const std::vector<std::size_t>& I,
211  const std::vector<std::size_t>& J) const;
214  scaled_add(scalar_t alpha, const DistributedMatrix<scalar_t>& B);
216  scale_and_add(scalar_t alpha, const DistributedMatrix<scalar_t>& B);
217 
218  real_t norm() const;
219  real_t normF() const;
220  real_t norm1() const;
221  real_t normI() const;
222 
223  virtual std::size_t memory() const
224  { return sizeof(scalar_t)*std::size_t(lrows())*std::size_t(lcols()); }
225  virtual std::size_t total_memory() const
226  { return sizeof(scalar_t)*std::size_t(rows())*std::size_t(cols()); }
227  virtual std::size_t nonzeros() const
228  { return std::size_t(lrows())*std::size_t(lcols()); }
229  virtual std::size_t total_nonzeros() const
230  { return std::size_t(rows())*std::size_t(cols()); }
231 
232  void scatter(const DenseMatrix<scalar_t>& a);
233  DenseMatrix<scalar_t> gather() const;
234  DenseMatrix<scalar_t> all_gather() const;
235 
236  DenseMatrix<scalar_t> dense_and_clear();
237  DenseMatrix<scalar_t> dense() const;
238  DenseMatrixWrapper<scalar_t> dense_wrapper();
239 
240  std::vector<int> LU();
241  int LU(std::vector<int>&);
242 
244  solve(const DistributedMatrix<scalar_t>& b,
245  const std::vector<int>& piv) const;
246 
248  DistributedMatrix<scalar_t>& Q) const;
249 
250  void orthogonalize(scalar_t& r_max, scalar_t& r_min);
251 
252  void ID_column(DistributedMatrix<scalar_t>& X, std::vector<int>& piv,
253  std::vector<std::size_t>& ind,
254  real_t rel_tol, real_t abs_tol, int max_rank);
255  void ID_row(DistributedMatrix<scalar_t>& X, std::vector<int>& piv,
256  std::vector<std::size_t>& ind, real_t rel_tol, real_t abs_tol,
257  int max_rank, const BLACSGrid* grid_T);
258 
259  static const int default_MB = STRUMPACK_PBLAS_BLOCKSIZE;
260  static const int default_NB = STRUMPACK_PBLAS_BLOCKSIZE;
261  };
262 
267  template<typename scalar_t> void
268  copy(std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& a,
269  std::size_t ia, std::size_t ja, DenseMatrix<scalar_t>& b,
270  int dest, int context_all);
271 
272  template<typename scalar_t> void
273  copy(std::size_t m, std::size_t n, const DenseMatrix<scalar_t>& a, int src,
274  DistributedMatrix<scalar_t>& b, std::size_t ib, std::size_t jb,
275  int context_all);
276 
278  template<typename scalar_t> void
279  copy(std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& a,
280  std::size_t ia, std::size_t ja, DistributedMatrix<scalar_t>& b,
281  std::size_t ib, std::size_t jb, int context_all);
282 
288  template<typename scalar_t>
289  class DistributedMatrixWrapper : public DistributedMatrix<scalar_t> {
290  private:
291  int _rows, _cols, _i, _j;
292  public:
294  _rows(0), _cols(0), _i(0), _j(0) {}
295 
299  DistributedMatrixWrapper(std::size_t m, std::size_t n,
301  std::size_t i, std::size_t j);
302  DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
303  scalar_t* A);
304  DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
305  int MB, int NB, scalar_t* A);
306  DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
308 
309  virtual ~DistributedMatrixWrapper() { this->data_ = nullptr; }
310 
312  operator=(const DistributedMatrixWrapper<scalar_t>& A);
314  operator=(DistributedMatrixWrapper<scalar_t>&& A);
315 
316  int rows() const override { return _rows; }
317  int cols() const override { return _cols; }
318  int I() const override { return _i+1; }
319  int J() const override { return _j+1; }
320  void lranges(int& rlo, int& rhi, int& clo, int& chi) const override;
321 
322  void resize(std::size_t m, std::size_t n) override { assert(1); }
323  void hconcat(const DistributedMatrix<scalar_t>& b) override { assert(1); }
324  void clear()
325  { this->data_ = nullptr; DistributedMatrix<scalar_t>::clear(); }
326  std::size_t memory() const override { return 0; }
327  std::size_t total_memory() const override { return 0; }
328  std::size_t nonzeros() const override { return 0; }
329  std::size_t total_nonzeros() const override { return 0; }
330 
331  DenseMatrix<scalar_t> dense_and_clear() = delete;
332  DenseMatrixWrapper<scalar_t> dense_wrapper() = delete;
334  operator=(const DistributedMatrix<scalar_t>&) = delete;
336  operator=(DistributedMatrix<scalar_t>&&) = delete;
337  };
338 
339 
340  template<typename scalar_t> long long int
342  if (!a.active()) return 0;
343  return (is_complex<scalar_t>() ? 4:1) *
344  blas::getrf_flops(a.rows(), a.cols()) /
345  a.npactives();
346  }
347 
348  template<typename scalar_t> long long int
349  solve_flops(const DistributedMatrix<scalar_t>& b) {
350  if (!b.active()) return 0;
351  return (is_complex<scalar_t>() ? 4:1) *
352  blas::getrs_flops(b.rows(), b.cols()) /
353  b.npactives();
354  }
355 
356  template<typename scalar_t> long long int
357  LQ_flops(const DistributedMatrix<scalar_t>& a) {
358  if (!a.active()) return 0;
359  auto minrc = std::min(a.rows(), a.cols());
360  return (is_complex<scalar_t>() ? 4:1) *
361  (blas::gelqf_flops(a.rows(), a.cols()) +
362  blas::xxglq_flops(a.cols(), a.cols(), minrc)) /
363  a.npactives();
364  }
365 
366  template<typename scalar_t> long long int
367  ID_row_flops(const DistributedMatrix<scalar_t>& a, int rank) {
368  if (!a.active()) return 0;
369  return (is_complex<scalar_t>() ? 4:1) *
370  (blas::geqp3_flops(a.cols(), a.rows())
371  + blas::trsm_flops(rank, a.cols() - rank, scalar_t(1.), 'L')) /
372  a.npactives();
373  }
374 
375  template<typename scalar_t> long long int
376  trsm_flops(Side s, scalar_t alpha, const DistributedMatrix<scalar_t>& a,
377  const DistributedMatrix<scalar_t>& b) {
378  if (!a.active()) return 0;
379  return (is_complex<scalar_t>() ? 4:1) *
380  blas::trsm_flops(b.rows(), b.cols(), alpha, char(s)) /
381  a.npactives();
382  }
383 
384  template<typename scalar_t> long long int
385  gemm_flops(Trans ta, Trans tb, scalar_t alpha,
386  const DistributedMatrix<scalar_t>& a,
387  const DistributedMatrix<scalar_t>& b, scalar_t beta) {
388  if (!a.active()) return 0;
389  return (is_complex<scalar_t>() ? 4:1) *
391  ((ta==Trans::N) ? a.rows() : a.cols(),
392  (tb==Trans::N) ? b.cols() : b.rows(),
393  (ta==Trans::N) ? a.cols() : a.rows(), alpha, beta) /
394  a.npactives();
395  }
396 
397  template<typename scalar_t> long long int
398  gemv_flops(Trans ta, const DistributedMatrix<scalar_t>& a,
399  scalar_t alpha, scalar_t beta) {
400  if (!a.active()) return 0;
401  auto m = (ta==Trans::N) ? a.rows() : a.cols();
402  auto n = (ta==Trans::N) ? a.cols() : a.rows();
403  return (is_complex<scalar_t>() ? 4:1) *
404  ((alpha != scalar_t(0.)) * m * (n * 2 - 1) +
405  (alpha != scalar_t(1.) && alpha != scalar_t(0.)) * m +
406  (beta != scalar_t(0.) && beta != scalar_t(1.)) * m +
407  (alpha != scalar_t(0.) && beta != scalar_t(0.)) * m) /
408  a.npactives();
409  }
410 
411  template<typename scalar_t> long long int
412  orthogonalize_flops(const DistributedMatrix<scalar_t>& a) {
413  if (!a.active()) return 0;
414  auto minrc = std::min(a.rows(), a.cols());
415  return (is_complex<scalar_t>() ? 4:1) *
416  (blas::geqrf_flops(a.rows(), minrc) +
417  blas::xxgqr_flops(a.rows(), minrc, minrc)) /
418  a.npactives();
419  }
420 
421 
422  template<typename scalar_t>
423  std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
424  ConstDistributedMatrixWrapperPtr
425  (std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& D,
426  std::size_t i, std::size_t j) {
427  return std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
428  (new DistributedMatrixWrapper<scalar_t>
429  (m, n, const_cast<DistributedMatrix<scalar_t>&>(D), i, j));
430  }
431 
432 
433  template<typename scalar_t> void gemm
434  (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix<scalar_t>& A,
435  const DistributedMatrix<scalar_t>& B, scalar_t beta,
436  DistributedMatrix<scalar_t>& C);
437 
438  template<typename scalar_t> void trsm
439  (Side s, UpLo u, Trans ta, Diag d, scalar_t alpha,
440  const DistributedMatrix<scalar_t>& A, DistributedMatrix<scalar_t>& B);
441 
442  template<typename scalar_t> void trsv
443  (UpLo ul, Trans ta, Diag d, const DistributedMatrix<scalar_t>& A,
444  DistributedMatrix<scalar_t>& B);
445 
446  template<typename scalar_t> void gemv
447  (Trans ta, scalar_t alpha, const DistributedMatrix<scalar_t>& A,
448  const DistributedMatrix<scalar_t>& X, scalar_t beta,
449  DistributedMatrix<scalar_t>& Y);
450 
451  template<typename scalar_t> DistributedMatrix<scalar_t> vconcat
452  (int cols, int arows, int brows, const DistributedMatrix<scalar_t>& a,
453  const DistributedMatrix<scalar_t>& b, const BLACSGrid* gnew, int cxt_all);
454 
455  template<typename scalar_t> void subgrid_copy_to_buffers
456  (const DistributedMatrix<scalar_t>& a, const DistributedMatrix<scalar_t>& b,
457  int p0, int npr, int npc, std::vector<std::vector<scalar_t>>& sbuf);
458 
459  template<typename scalar_t> void subproc_copy_to_buffers
460  (const DenseMatrix<scalar_t>& a, const DistributedMatrix<scalar_t>& b,
461  int p0, int npr, int npc, std::vector<std::vector<scalar_t>>& sbuf);
462 
463  template<typename scalar_t> void subgrid_add_from_buffers
464  (const BLACSGrid* subg, int master, DistributedMatrix<scalar_t>& b,
465  std::vector<scalar_t*>& pbuf);
466 
467 } // end namespace strumpack
468 
469 #endif // DISTRIBUTED_MATRIX_HPP
Contains a wrapper class around a BLACS grid/context.
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
Contains some simple C++ MPI wrapper utilities.
This is a small wrapper class around a BLACS grid and a BLACS context.
Definition: BLACSGrid.hpp:66
bool active() const
Definition: BLACSGrid.hpp:264
int npcols() const
Definition: BLACSGrid.hpp:232
int ctxt_all() const
Definition: BLACSGrid.hpp:220
int npactives() const
Definition: BLACSGrid.hpp:257
int pcol() const
Definition: BLACSGrid.hpp:244
const MPIComm & Comm() const
Definition: BLACSGrid.hpp:196
int nprows() const
Definition: BLACSGrid.hpp:226
int ctxt() const
Definition: BLACSGrid.hpp:209
int prow() const
Definition: BLACSGrid.hpp:238
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Definition: DistributedMatrix.hpp:289
Definition: DistributedMatrix.hpp:52
Wrapper class around an MPI_Comm object.
Definition: MPIWrapper.hpp:194
MPI_Comm comm() const
Definition: MPIWrapper.hpp:261
class to wrap the C++11 random number generator/distribution
Definition: RandomWrapper.hpp:105
Definition: StrumpackOptions.hpp:42
long long int LU_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1452
long long int gemm_flops(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta)
Definition: DenseMatrix.hpp:1491
UpLo
Definition: DenseMatrix.hpp:83
DenseMatrix< scalar_t > vconcat(const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1280
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
long long int orthogonalize_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1513
long long int trsm_flops(Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1483
Side
Definition: DenseMatrix.hpp:74
void copy(std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb)
Definition: DenseMatrix.hpp:1231
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
long long int ID_row_flops(const DenseMatrix< scalar_t > &a, int rank)
Definition: DenseMatrix.hpp:1475
long long int LQ_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1466
long long int solve_flops(const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1459
Diag
Definition: DenseMatrix.hpp:92