DistributedMatrix.hpp
Go to the documentation of this file.
1 /*
2  * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3  * Regents of the University of California, through Lawrence Berkeley
4  * National Laboratory (subject to receipt of any required approvals
5  * from the U.S. Dept. of Energy). All rights reserved.
6  *
7  * If you have questions about your rights to use or distribute this
8  * software, please contact Berkeley Lab's Technology Transfer
9  * Department at TTD@lbl.gov.
10  *
11  * NOTICE. This software is owned by the U.S. Department of Energy. As
12  * such, the U.S. Government has been granted for itself and others
13  * acting on its behalf a paid-up, nonexclusive, irrevocable,
14  * worldwide license in the Software to reproduce, prepare derivative
15  * works, and perform publicly and display publicly. Beginning five
16  * (5) years after the date permission to assert copyright is obtained
17  * from the U.S. Department of Energy, and subject to any subsequent
18  * five (5) year renewals, the U.S. Government is granted for itself
19  * and others acting on its behalf a paid-up, nonexclusive,
20  * irrevocable, worldwide license in the Software to reproduce,
21  * prepare derivative works, distribute copies to the public, perform
22  * publicly and display publicly, and to permit others to do so.
23  *
24  * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25  * (Lawrence Berkeley National Lab, Computational Research
26  * Division).
27  */
34 #ifndef DISTRIBUTED_MATRIX_HPP
35 #define DISTRIBUTED_MATRIX_HPP
36 
37 #include <vector>
38 #include <string>
39 #include <cmath>
40 #include <functional>
41 
42 #include "misc/MPIWrapper.hpp"
43 #include "misc/RandomWrapper.hpp"
44 #include "DenseMatrix.hpp"
45 #include "BLACSGrid.hpp"
46 
47 namespace strumpack {
48 
49 #ifndef DOXYGEN_SHOULD_SKIP_THIS
50  inline int indxl2g(int INDXLOC, int NB,
51  int IPROC, int ISRCPROC, int NPROCS) {
52  return NPROCS*NB*((INDXLOC-1)/NB) + (INDXLOC-1) % NB +
53  ((NPROCS+IPROC-ISRCPROC) % NPROCS)*NB + 1;
54  }
55  inline int indxg2l(int INDXGLOB, int NB,
56  int IPROC, int ISRCPROC, int NPROCS) {
57  return NB*((INDXGLOB-1)/(NB*NPROCS)) + (INDXGLOB-1) % NB + 1;
58  }
59  inline int indxg2p(int INDXGLOB, int NB,
60  int IPROC, int ISRCPROC, int NPROCS) {
61  return ( ISRCPROC + (INDXGLOB - 1) / NB ) % NPROCS;
62  }
63 #endif
64 
84  template<typename scalar_t> class DistributedMatrix {
85  using real_t = typename RealType<scalar_t>::value_type;
86 
87  public:
88 
93 
105  DistributedMatrix(const BLACSGrid* g, int M, int N);
106 
119  DistributedMatrix(const BLACSGrid* g, int M, int N,
120  const std::function<scalar_t(std::size_t,
121  std::size_t)>& A);
122 
133 
143 
154 
171  DistributedMatrix(const BLACSGrid* g, int M, int N,
173  int context_all);
174 
183  DistributedMatrix(const BLACSGrid* g, int M, int N, int MB, int NB);
184 
185 
190 
195 
200 
207 
214 
215 
220  const int* desc() const { return desc_; }
221 
229  bool active() const { return grid() && grid()->active(); }
230 
235  const BLACSGrid* grid() const { return grid_; }
236 
242  const MPIComm& Comm() const { return grid()->Comm(); }
243 
248  MPI_Comm comm() const { return Comm().comm(); }
249 
254  int ctxt() const { return grid() ? grid()->ctxt() : -1; }
255 
262  int ctxt_all() const { return grid() ? grid()->ctxt_all() : -1; }
263 
268  virtual int rows() const { return desc_[2]; }
273  virtual int cols() const { return desc_[3]; }
278  int lrows() const { return lrows_; }
283  int lcols() const { return lcols_; }
288  int ld() const { return lrows_; }
289 
294  int MB() const { return desc_[4]; }
299  int NB() const { return desc_[5]; }
300 
305  int rowblocks() const { return std::ceil(float(lrows()) / MB()); }
310  int colblocks() const { return std::ceil(float(lcols()) / NB()); }
311 
318  virtual int I() const { return 1; }
325  virtual int J() const { return 1; }
326 
340  virtual void lranges(int& rlo, int& rhi, int& clo, int& chi) const;
341 
346  const scalar_t* data() const { return data_; }
347 
352  scalar_t* data() { return data_; }
353 
363  const scalar_t& operator()(int r, int c) const { return data_[r+ld()*c]; }
364 
374  scalar_t& operator()(int r, int c) { return data_[r+ld()*c]; }
375 
381  int prow() const { assert(grid()); return grid()->prow(); }
387  int pcol() const { assert(grid()); return grid()->pcol(); }
393  int nprows() const { assert(grid()); return grid()->nprows(); }
399  int npcols() const { assert(grid()); return grid()->npcols(); }
406  int npactives() const {
407  assert(grid());
408  return grid()->npactives();
409  }
410 
416  bool is_master() const {
417  return grid() && prow() == 0 && pcol() == 0;
418  }
425  int rowl2g(int row) const {
426  assert(grid());
427  return indxl2g(row+1, MB(), prow(), 0, nprows()) - I();
428  }
435  int coll2g(int col) const {
436  assert(grid());
437  return indxl2g(col+1, NB(), pcol(), 0, npcols()) - J();
438  }
445  int rowg2l(int row) const {
446  assert(grid());
447  return indxg2l(row+I(), MB(), prow(), 0, nprows()) - 1;
448  }
455  int colg2l(int col) const {
456  assert(grid());
457  return indxg2l(col+J(), NB(), pcol(), 0, npcols()) - 1;
458  }
465  int rowg2p(int row) const {
466  assert(grid());
467  return indxg2p(row+I(), MB(), prow(), 0, nprows());
468  }
476  int colg2p(int col) const {
477  assert(grid());
478  return indxg2p(col+J(), NB(), pcol(), 0, npcols());
479  }
488  int rank(int r, int c) const {
489  return rowg2p(r) + colg2p(c) * nprows();
490  }
497  bool is_local(int r, int c) const {
498  assert(grid());
499  return rowg2p(r) == prow() && colg2p(c) == pcol();
500  }
501 
502 #ifndef DOXYGEN_SHOULD_SKIP_THIS
503  bool fixed() const { return MB()==default_MB && NB()==default_NB; }
504  int rowl2g_fixed(int row) const {
505  assert(grid() && fixed());
506  return indxl2g(row+1, default_MB, prow(), 0, nprows()) - I();
507  }
508  int coll2g_fixed(int col) const {
509  assert(grid() && fixed());
510  return indxl2g(col+1, default_NB, pcol(), 0, npcols()) - J();
511  }
512  int rowg2l_fixed(int row) const {
513  assert(grid() && fixed());
514  return indxg2l(row+I(), default_MB, prow(), 0, nprows()) - 1;
515  }
516  int colg2l_fixed(int col) const {
517  assert(grid() && fixed());
518  return indxg2l(col+J(), default_NB, pcol(), 0, npcols()) - 1;
519  }
520  int rowg2p_fixed(int row) const {
521  assert(grid() && fixed());
522  return indxg2p(row+I(), default_MB, prow(), 0, nprows());
523  }
524  int colg2p_fixed(int col) const {
525  assert(grid() && fixed());
526  return indxg2p(col+J(), default_NB, pcol(), 0, npcols());
527  }
528  int rank_fixed(int r, int c) const {
529  assert(grid() && fixed()); return rowg2p_fixed(r) + colg2p_fixed(c) * nprows();
530  }
531  bool is_local_fixed(int r, int c) const {
532  assert(grid() && fixed());
533  return rowg2p_fixed(r) == prow() && colg2p_fixed(c) == pcol();
534  }
535  scalar_t& global_fixed(int r, int c) {
536  assert(is_local(r, c)); assert(fixed());
537  return operator()(rowg2l_fixed(r),colg2l_fixed(c));
538  }
539 #endif
540 
551  const scalar_t& global(int r, int c) const {
552  assert(is_local(r, c));
553  return operator()(rowg2l(r),colg2l(c));
554  }
565  scalar_t& global(int r, int c) {
566  assert(is_local(r, c)); return operator()(rowg2l(r),colg2l(c));
567  }
578  void global(int r, int c, scalar_t v) {
579  if (active() && is_local(r, c)) operator()(rowg2l(r),colg2l(c)) = v;
580  }
592  scalar_t all_global(int r, int c) const;
593 
594  void print() const { print("A"); }
595  void print(std::string name, int precision=15) const;
596  void print_to_file(std::string name, std::string filename,
597  int width=8) const;
598  void print_to_files(std::string name, int precision=16) const;
599  void random();
600  void random(random::RandomGeneratorBase<typename RealType<scalar_t>::
601  value_type>& rgen);
602  void zero();
603  void fill(scalar_t a);
604  void fill(const std::function<scalar_t(std::size_t,
605  std::size_t)>& A);
606  void eye();
607  void shift(scalar_t sigma);
608  scalar_t trace() const;
609  void clear();
610  virtual void resize(std::size_t m, std::size_t n);
611  virtual void hconcat(const DistributedMatrix<scalar_t>& b);
612  DistributedMatrix<scalar_t> transpose() const;
613 
614  void mult(Trans op, const DistributedMatrix<scalar_t>& X,
615  DistributedMatrix<scalar_t>& Y) const;
616 
617  void laswp(const std::vector<int>& P, bool fwd);
618 
620  extract_rows(const std::vector<std::size_t>& Ir) const;
622  extract_cols(const std::vector<std::size_t>& Ic) const;
623 
625  extract(const std::vector<std::size_t>& I,
626  const std::vector<std::size_t>& J) const;
629  scaled_add(scalar_t alpha, const DistributedMatrix<scalar_t>& B);
631  scale_and_add(scalar_t alpha, const DistributedMatrix<scalar_t>& B);
632 
633  real_t norm() const;
634  real_t normF() const;
635  real_t norm1() const;
636  real_t normI() const;
637 
638  virtual std::size_t memory() const {
639  return sizeof(scalar_t)*std::size_t(lrows())*std::size_t(lcols());
640  }
641  virtual std::size_t total_memory() const {
642  return sizeof(scalar_t)*std::size_t(rows())*std::size_t(cols());
643  }
644  virtual std::size_t nonzeros() const {
645  return std::size_t(lrows())*std::size_t(lcols());
646  }
647  virtual std::size_t total_nonzeros() const {
648  return std::size_t(rows())*std::size_t(cols());
649  }
650 
651  void scatter(const DenseMatrix<scalar_t>& a);
652  DenseMatrix<scalar_t> gather() const;
653  DenseMatrix<scalar_t> all_gather() const;
654 
655  DenseMatrix<scalar_t> dense_and_clear();
656  DenseMatrix<scalar_t> dense() const;
657  DenseMatrixWrapper<scalar_t> dense_wrapper();
658 
659  std::vector<int> LU();
660  int LU(std::vector<int>&);
661 
662  DistributedMatrix<scalar_t>
663  solve(const DistributedMatrix<scalar_t>& b,
664  const std::vector<int>& piv) const;
665 
666  void LQ(DistributedMatrix<scalar_t>& L,
667  DistributedMatrix<scalar_t>& Q) const;
668 
669  void orthogonalize(scalar_t& r_max, scalar_t& r_min);
670 
671  void ID_column(DistributedMatrix<scalar_t>& X, std::vector<int>& piv,
672  std::vector<std::size_t>& ind,
673  real_t rel_tol, real_t abs_tol, int max_rank);
674  void ID_row(DistributedMatrix<scalar_t>& X, std::vector<int>& piv,
675  std::vector<std::size_t>& ind, real_t rel_tol, real_t abs_tol,
676  int max_rank, const BLACSGrid* grid_T);
677 
690  static const int default_MB = STRUMPACK_PBLAS_BLOCKSIZE;
697  static const int default_NB = STRUMPACK_PBLAS_BLOCKSIZE;
698 
699  protected:
700  const BLACSGrid* grid_ = nullptr;
701  scalar_t* data_ = nullptr;
702  int lrows_;
703  int lcols_;
704  int desc_[9];
705  };
706 
711  template<typename scalar_t> void
712  copy(std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& a,
713  std::size_t ia, std::size_t ja, DenseMatrix<scalar_t>& b,
714  int dest, int context_all);
715 
716  template<typename scalar_t> void
717  copy(std::size_t m, std::size_t n, const DenseMatrix<scalar_t>& a, int src,
718  DistributedMatrix<scalar_t>& b, std::size_t ib, std::size_t jb,
719  int context_all);
720 
722  template<typename scalar_t> void
723  copy(std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& a,
724  std::size_t ia, std::size_t ja, DistributedMatrix<scalar_t>& b,
725  std::size_t ib, std::size_t jb, int context_all);
726 
732  template<typename scalar_t>
733  class DistributedMatrixWrapper : public DistributedMatrix<scalar_t> {
734  private:
735  int _rows, _cols, _i, _j;
736  public:
738  _rows(0), _cols(0), _i(0), _j(0) {}
739 
743  DistributedMatrixWrapper(std::size_t m, std::size_t n,
745  std::size_t i, std::size_t j);
746  DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
747  scalar_t* A);
748  DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
749  int MB, int NB, scalar_t* A);
750  DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
752 
753  virtual ~DistributedMatrixWrapper() { this->data_ = nullptr; }
754 
756  operator=(const DistributedMatrixWrapper<scalar_t>& A);
758  operator=(DistributedMatrixWrapper<scalar_t>&& A);
759 
760  int rows() const override { return _rows; }
761  int cols() const override { return _cols; }
762  int I() const override { return _i+1; }
763  int J() const override { return _j+1; }
764  void lranges(int& rlo, int& rhi, int& clo, int& chi) const override;
765 
766  void resize(std::size_t m, std::size_t n) override { assert(1); }
767  void hconcat(const DistributedMatrix<scalar_t>& b) override { assert(1); }
768  void clear()
769  { this->data_ = nullptr; DistributedMatrix<scalar_t>::clear(); }
770  std::size_t memory() const override { return 0; }
771  std::size_t total_memory() const override { return 0; }
772  std::size_t nonzeros() const override { return 0; }
773  std::size_t total_nonzeros() const override { return 0; }
774 
775  DenseMatrix<scalar_t> dense_and_clear() = delete;
776  DenseMatrixWrapper<scalar_t> dense_wrapper() = delete;
777  DistributedMatrixWrapper<scalar_t>&
778  operator=(const DistributedMatrix<scalar_t>&) = delete;
779  DistributedMatrixWrapper<scalar_t>&
780  operator=(DistributedMatrix<scalar_t>&&) = delete;
781  };
782 
783 
784  template<typename scalar_t> long long int
785  LU_flops(const DistributedMatrix<scalar_t>& a) {
786  if (!a.active()) return 0;
787  return (is_complex<scalar_t>() ? 4:1) *
788  blas::getrf_flops(a.rows(), a.cols()) /
789  a.npactives();
790  }
791 
792  template<typename scalar_t> long long int
793  solve_flops(const DistributedMatrix<scalar_t>& b) {
794  if (!b.active()) return 0;
795  return (is_complex<scalar_t>() ? 4:1) *
796  blas::getrs_flops(b.rows(), b.cols()) /
797  b.npactives();
798  }
799 
800  template<typename scalar_t> long long int
801  LQ_flops(const DistributedMatrix<scalar_t>& a) {
802  if (!a.active()) return 0;
803  auto minrc = std::min(a.rows(), a.cols());
804  return (is_complex<scalar_t>() ? 4:1) *
805  (blas::gelqf_flops(a.rows(), a.cols()) +
806  blas::xxglq_flops(a.cols(), a.cols(), minrc)) /
807  a.npactives();
808  }
809 
810  template<typename scalar_t> long long int
811  ID_row_flops(const DistributedMatrix<scalar_t>& a, int rank) {
812  if (!a.active()) return 0;
813  return (is_complex<scalar_t>() ? 4:1) *
814  (blas::geqp3_flops(a.cols(), a.rows())
815  + blas::trsm_flops(rank, a.cols() - rank, scalar_t(1.), 'L')) /
816  a.npactives();
817  }
818 
819  template<typename scalar_t> long long int
820  trsm_flops(Side s, scalar_t alpha, const DistributedMatrix<scalar_t>& a,
821  const DistributedMatrix<scalar_t>& b) {
822  if (!a.active()) return 0;
823  return (is_complex<scalar_t>() ? 4:1) *
824  blas::trsm_flops(b.rows(), b.cols(), alpha, char(s)) /
825  a.npactives();
826  }
827 
828  template<typename scalar_t> long long int
829  gemm_flops(Trans ta, Trans tb, scalar_t alpha,
830  const DistributedMatrix<scalar_t>& a,
831  const DistributedMatrix<scalar_t>& b, scalar_t beta) {
832  if (!a.active()) return 0;
833  return (is_complex<scalar_t>() ? 4:1) *
834  blas::gemm_flops
835  ((ta==Trans::N) ? a.rows() : a.cols(),
836  (tb==Trans::N) ? b.cols() : b.rows(),
837  (ta==Trans::N) ? a.cols() : a.rows(), alpha, beta) /
838  a.npactives();
839  }
840 
841  template<typename scalar_t> long long int
842  gemv_flops(Trans ta, const DistributedMatrix<scalar_t>& a,
843  scalar_t alpha, scalar_t beta) {
844  if (!a.active()) return 0;
845  auto m = (ta==Trans::N) ? a.rows() : a.cols();
846  auto n = (ta==Trans::N) ? a.cols() : a.rows();
847  return (is_complex<scalar_t>() ? 4:1) *
848  ((alpha != scalar_t(0.)) * m * (n * 2 - 1) +
849  (alpha != scalar_t(1.) && alpha != scalar_t(0.)) * m +
850  (beta != scalar_t(0.) && beta != scalar_t(1.)) * m +
851  (alpha != scalar_t(0.) && beta != scalar_t(0.)) * m) /
852  a.npactives();
853  }
854 
855  template<typename scalar_t> long long int
856  orthogonalize_flops(const DistributedMatrix<scalar_t>& a) {
857  if (!a.active()) return 0;
858  auto minrc = std::min(a.rows(), a.cols());
859  return (is_complex<scalar_t>() ? 4:1) *
860  (blas::geqrf_flops(a.rows(), minrc) +
861  blas::xxgqr_flops(a.rows(), minrc, minrc)) /
862  a.npactives();
863  }
864 
865 
866  template<typename scalar_t>
867  std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
868  ConstDistributedMatrixWrapperPtr
869  (std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& D,
870  std::size_t i, std::size_t j) {
871  return std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
872  (new DistributedMatrixWrapper<scalar_t>
873  (m, n, const_cast<DistributedMatrix<scalar_t>&>(D), i, j));
874  }
875 
876 
877  template<typename scalar_t> void gemm
878  (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix<scalar_t>& A,
879  const DistributedMatrix<scalar_t>& B, scalar_t beta,
880  DistributedMatrix<scalar_t>& C);
881 
882  template<typename scalar_t> void trsm
883  (Side s, UpLo u, Trans ta, Diag d, scalar_t alpha,
884  const DistributedMatrix<scalar_t>& A, DistributedMatrix<scalar_t>& B);
885 
886  template<typename scalar_t> void trsv
887  (UpLo ul, Trans ta, Diag d, const DistributedMatrix<scalar_t>& A,
888  DistributedMatrix<scalar_t>& B);
889 
890  template<typename scalar_t> void gemv
891  (Trans ta, scalar_t alpha, const DistributedMatrix<scalar_t>& A,
892  const DistributedMatrix<scalar_t>& X, scalar_t beta,
893  DistributedMatrix<scalar_t>& Y);
894 
895  template<typename scalar_t> DistributedMatrix<scalar_t> vconcat
896  (int cols, int arows, int brows, const DistributedMatrix<scalar_t>& a,
897  const DistributedMatrix<scalar_t>& b, const BLACSGrid* gnew, int cxt_all);
898 
899  template<typename scalar_t> void subgrid_copy_to_buffers
900  (const DistributedMatrix<scalar_t>& a, const DistributedMatrix<scalar_t>& b,
901  int p0, int npr, int npc, std::vector<std::vector<scalar_t>>& sbuf);
902 
903  template<typename scalar_t> void subproc_copy_to_buffers
904  (const DenseMatrix<scalar_t>& a, const DistributedMatrix<scalar_t>& b,
905  int p0, int npr, int npc, std::vector<std::vector<scalar_t>>& sbuf);
906 
907  template<typename scalar_t> void subgrid_add_from_buffers
908  (const BLACSGrid* subg, int master, DistributedMatrix<scalar_t>& b,
909  std::vector<scalar_t*>& pbuf);
910 
911 } // end namespace strumpack
912 
913 #endif // DISTRIBUTED_MATRIX_HPP
Contains a wrapper class around a BLACS grid/context.
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
Contains some simple C++ MPI wrapper utilities.
This is a small wrapper class around a BLACS grid and a BLACS context.
Definition: BLACSGrid.hpp:66
bool active() const
Definition: BLACSGrid.hpp:264
int npcols() const
Definition: BLACSGrid.hpp:232
int ctxt_all() const
Definition: BLACSGrid.hpp:220
int npactives() const
Definition: BLACSGrid.hpp:257
int pcol() const
Definition: BLACSGrid.hpp:244
const MPIComm & Comm() const
Definition: BLACSGrid.hpp:196
int nprows() const
Definition: BLACSGrid.hpp:226
int ctxt() const
Definition: BLACSGrid.hpp:209
int prow() const
Definition: BLACSGrid.hpp:238
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Definition: DistributedMatrix.hpp:733
int J() const override
Definition: DistributedMatrix.hpp:763
int I() const override
Definition: DistributedMatrix.hpp:762
int rows() const override
Definition: DistributedMatrix.hpp:760
void lranges(int &rlo, int &rhi, int &clo, int &chi) const override
int cols() const override
Definition: DistributedMatrix.hpp:761
2D block cyclicly distributed matrix, as used by ScaLAPACK.
Definition: DistributedMatrix.hpp:84
const scalar_t * data() const
Definition: DistributedMatrix.hpp:346
int prow() const
Definition: DistributedMatrix.hpp:381
DistributedMatrix(const BLACSGrid *g, DenseMatrix< scalar_t > &&m)
DistributedMatrix(const DistributedMatrix< scalar_t > &m)
int ctxt_all() const
Definition: DistributedMatrix.hpp:262
int lcols() const
Definition: DistributedMatrix.hpp:283
virtual int I() const
Definition: DistributedMatrix.hpp:318
scalar_t & global(int r, int c)
Definition: DistributedMatrix.hpp:565
DistributedMatrix< scalar_t > & operator=(const DistributedMatrix< scalar_t > &m)
int rowblocks() const
Definition: DistributedMatrix.hpp:305
bool is_master() const
Definition: DistributedMatrix.hpp:416
DistributedMatrix< scalar_t > & operator=(DistributedMatrix< scalar_t > &&m)
int npactives() const
Definition: DistributedMatrix.hpp:406
const int * desc() const
Definition: DistributedMatrix.hpp:220
const BLACSGrid * grid() const
Definition: DistributedMatrix.hpp:235
int MB() const
Definition: DistributedMatrix.hpp:294
int rowl2g(int row) const
Definition: DistributedMatrix.hpp:425
static const int default_MB
Definition: DistributedMatrix.hpp:690
int pcol() const
Definition: DistributedMatrix.hpp:387
virtual int rows() const
Definition: DistributedMatrix.hpp:268
scalar_t & operator()(int r, int c)
Definition: DistributedMatrix.hpp:374
static const int default_NB
Definition: DistributedMatrix.hpp:697
int npcols() const
Definition: DistributedMatrix.hpp:399
DistributedMatrix(const BLACSGrid *g, int M, int N, const std::function< scalar_t(std::size_t, std::size_t)> &A)
bool is_local(int r, int c) const
Definition: DistributedMatrix.hpp:497
DistributedMatrix(const BLACSGrid *g, DenseMatrixWrapper< scalar_t > &&m)
const scalar_t & operator()(int r, int c) const
Definition: DistributedMatrix.hpp:363
int rank(int r, int c) const
Definition: DistributedMatrix.hpp:488
DistributedMatrix(const BLACSGrid *g, int M, int N, int MB, int NB)
int colblocks() const
Definition: DistributedMatrix.hpp:310
DistributedMatrix(const BLACSGrid *g, int M, int N)
scalar_t all_global(int r, int c) const
int coll2g(int col) const
Definition: DistributedMatrix.hpp:435
DistributedMatrix(const BLACSGrid *g, const DenseMatrix< scalar_t > &m)
int nprows() const
Definition: DistributedMatrix.hpp:393
MPI_Comm comm() const
Definition: DistributedMatrix.hpp:248
void global(int r, int c, scalar_t v)
Definition: DistributedMatrix.hpp:578
bool active() const
Definition: DistributedMatrix.hpp:229
int ld() const
Definition: DistributedMatrix.hpp:288
int colg2p(int col) const
Definition: DistributedMatrix.hpp:476
virtual int cols() const
Definition: DistributedMatrix.hpp:273
int ctxt() const
Definition: DistributedMatrix.hpp:254
DistributedMatrix(const BLACSGrid *g, int M, int N, const DistributedMatrix< scalar_t > &m, int context_all)
scalar_t * data()
Definition: DistributedMatrix.hpp:352
int lrows() const
Definition: DistributedMatrix.hpp:278
virtual int J() const
Definition: DistributedMatrix.hpp:325
const scalar_t & global(int r, int c) const
Definition: DistributedMatrix.hpp:551
int rowg2p(int row) const
Definition: DistributedMatrix.hpp:465
virtual void lranges(int &rlo, int &rhi, int &clo, int &chi) const
int colg2l(int col) const
Definition: DistributedMatrix.hpp:455
int rowg2l(int row) const
Definition: DistributedMatrix.hpp:445
const MPIComm & Comm() const
Definition: DistributedMatrix.hpp:242
DistributedMatrix(DistributedMatrix< scalar_t > &&m)
int NB() const
Definition: DistributedMatrix.hpp:299
Wrapper class around an MPI_Comm object.
Definition: MPIWrapper.hpp:194
MPI_Comm comm() const
Definition: MPIWrapper.hpp:261
class to wrap the C++11 random number generator/distribution
Definition: RandomWrapper.hpp:105
Definition: StrumpackOptions.hpp:43
long long int LU_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1452
long long int gemm_flops(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta)
Definition: DenseMatrix.hpp:1491
UpLo
Definition: DenseMatrix.hpp:83
DenseMatrix< scalar_t > vconcat(const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1280
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
long long int orthogonalize_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1513
long long int trsm_flops(Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1483
Side
Definition: DenseMatrix.hpp:74
void copy(std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb)
Definition: DenseMatrix.hpp:1231
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
long long int ID_row_flops(const DenseMatrix< scalar_t > &a, int rank)
Definition: DenseMatrix.hpp:1475
long long int LQ_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1466
long long int solve_flops(const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1459
Diag
Definition: DenseMatrix.hpp:92