34 #ifndef DISTRIBUTED_MATRIX_HPP
35 #define DISTRIBUTED_MATRIX_HPP
43 #include "misc/RandomWrapper.hpp"
49 #ifndef DOXYGEN_SHOULD_SKIP_THIS
50 inline int indxl2g(
int INDXLOC,
int NB,
51 int IPROC,
int ISRCPROC,
int NPROCS) {
52 return NPROCS*NB*((INDXLOC-1)/NB) + (INDXLOC-1) % NB +
53 ((NPROCS+IPROC-ISRCPROC) % NPROCS)*NB + 1;
55 inline int indxg2l(
int INDXGLOB,
int NB,
56 int IPROC,
int ISRCPROC,
int NPROCS) {
57 return NB*((INDXGLOB-1)/(NB*NPROCS)) + (INDXGLOB-1) % NB + 1;
59 inline int indxg2p(
int INDXGLOB,
int NB,
60 int IPROC,
int ISRCPROC,
int NPROCS) {
61 return ( ISRCPROC + (INDXGLOB - 1) / NB ) % NPROCS;
85 using real_t =
typename RealType<scalar_t>::value_type;
120 const std::function<scalar_t(std::size_t,
220 const int*
desc()
const {
return desc_; }
268 virtual int rows()
const {
return desc_[2]; }
273 virtual int cols()
const {
return desc_[3]; }
278 int lrows()
const {
return lrows_; }
283 int lcols()
const {
return lcols_; }
288 int ld()
const {
return lrows_; }
294 int MB()
const {
return desc_[4]; }
299 int NB()
const {
return desc_[5]; }
318 virtual int I()
const {
return 1; }
325 virtual int J()
const {
return 1; }
340 virtual void lranges(
int& rlo,
int& rhi,
int& clo,
int& chi)
const;
346 const scalar_t*
data()
const {
return data_; }
352 scalar_t*
data() {
return data_; }
363 const scalar_t&
operator()(
int r,
int c)
const {
return data_[r+
ld()*c]; }
502 #ifndef DOXYGEN_SHOULD_SKIP_THIS
504 int rowl2g_fixed(
int row)
const {
505 assert(
grid() && fixed());
508 int coll2g_fixed(
int col)
const {
509 assert(
grid() && fixed());
512 int rowg2l_fixed(
int row)
const {
513 assert(
grid() && fixed());
516 int colg2l_fixed(
int col)
const {
517 assert(
grid() && fixed());
520 int rowg2p_fixed(
int row)
const {
521 assert(
grid() && fixed());
524 int colg2p_fixed(
int col)
const {
525 assert(
grid() && fixed());
528 int rank_fixed(
int r,
int c)
const {
529 assert(
grid() && fixed());
return rowg2p_fixed(r) + colg2p_fixed(c) *
nprows();
531 bool is_local_fixed(
int r,
int c)
const {
532 assert(
grid() && fixed());
533 return rowg2p_fixed(r) ==
prow() && colg2p_fixed(c) ==
pcol();
535 scalar_t& global_fixed(
int r,
int c) {
536 assert(
is_local(r, c)); assert(fixed());
537 return operator()(rowg2l_fixed(r),colg2l_fixed(c));
551 const scalar_t&
global(
int r,
int c)
const {
594 void print()
const { print(
"A"); }
595 void print(std::string name,
int precision=15)
const;
596 void print_to_file(std::string name, std::string filename,
598 void print_to_files(std::string name,
int precision=16)
const;
603 void fill(scalar_t a);
604 void fill(
const std::function<scalar_t(std::size_t,
607 void shift(scalar_t sigma);
608 scalar_t trace()
const;
610 virtual void resize(std::size_t m, std::size_t n);
617 void laswp(
const std::vector<int>& P,
bool fwd);
620 extract_rows(
const std::vector<std::size_t>& Ir)
const;
622 extract_cols(
const std::vector<std::size_t>& Ic)
const;
625 extract(
const std::vector<std::size_t>&
I,
626 const std::vector<std::size_t>&
J)
const;
634 real_t normF()
const;
635 real_t norm1()
const;
636 real_t normI()
const;
638 virtual std::size_t memory()
const {
639 return sizeof(scalar_t)*std::size_t(
lrows())*std::size_t(
lcols());
641 virtual std::size_t total_memory()
const {
642 return sizeof(scalar_t)*std::size_t(
rows())*std::size_t(
cols());
644 virtual std::size_t nonzeros()
const {
645 return std::size_t(
lrows())*std::size_t(
lcols());
647 virtual std::size_t total_nonzeros()
const {
648 return std::size_t(
rows())*std::size_t(
cols());
651 void scatter(
const DenseMatrix<scalar_t>& a);
652 DenseMatrix<scalar_t> gather()
const;
653 DenseMatrix<scalar_t> all_gather()
const;
655 DenseMatrix<scalar_t> dense_and_clear();
656 DenseMatrix<scalar_t> dense()
const;
657 DenseMatrixWrapper<scalar_t> dense_wrapper();
659 std::vector<int> LU();
660 int LU(std::vector<int>&);
662 DistributedMatrix<scalar_t>
663 solve(
const DistributedMatrix<scalar_t>& b,
664 const std::vector<int>& piv)
const;
666 void LQ(DistributedMatrix<scalar_t>& L,
667 DistributedMatrix<scalar_t>& Q)
const;
669 void orthogonalize(scalar_t& r_max, scalar_t& r_min);
671 void ID_column(DistributedMatrix<scalar_t>& X, std::vector<int>& piv,
672 std::vector<std::size_t>& ind,
673 real_t rel_tol, real_t abs_tol,
int max_rank);
674 void ID_row(DistributedMatrix<scalar_t>& X, std::vector<int>& piv,
675 std::vector<std::size_t>& ind, real_t rel_tol, real_t abs_tol,
676 int max_rank,
const BLACSGrid* grid_T);
701 scalar_t* data_ =
nullptr;
711 template<
typename scalar_t>
void
714 int dest,
int context_all);
716 template<
typename scalar_t>
void
722 template<
typename scalar_t>
void
725 std::size_t ib, std::size_t jb,
int context_all);
732 template<
typename scalar_t>
735 int _rows, _cols, _i, _j;
738 _rows(0), _cols(0), _i(0), _j(0) {}
745 std::size_t i, std::size_t j);
749 int MB,
int NB, scalar_t* A);
760 int rows()
const override {
return _rows; }
761 int cols()
const override {
return _cols; }
762 int I()
const override {
return _i+1; }
763 int J()
const override {
return _j+1; }
764 void lranges(
int& rlo,
int& rhi,
int& clo,
int& chi)
const override;
766 void resize(std::size_t m, std::size_t n)
override { assert(1); }
769 { this->data_ =
nullptr; DistributedMatrix<scalar_t>::clear(); }
770 std::size_t memory()
const override {
return 0; }
771 std::size_t total_memory()
const override {
return 0; }
772 std::size_t nonzeros()
const override {
return 0; }
773 std::size_t total_nonzeros()
const override {
return 0; }
775 DenseMatrix<scalar_t> dense_and_clear() =
delete;
776 DenseMatrixWrapper<scalar_t> dense_wrapper() =
delete;
777 DistributedMatrixWrapper<scalar_t>&
778 operator=(
const DistributedMatrix<scalar_t>&) =
delete;
779 DistributedMatrixWrapper<scalar_t>&
780 operator=(DistributedMatrix<scalar_t>&&) =
delete;
784 template<
typename scalar_t>
long long int
785 LU_flops(
const DistributedMatrix<scalar_t>& a) {
786 if (!a.active())
return 0;
787 return (is_complex<scalar_t>() ? 4:1) *
788 blas::getrf_flops(a.rows(), a.cols()) /
792 template<
typename scalar_t>
long long int
793 solve_flops(
const DistributedMatrix<scalar_t>& b) {
794 if (!b.active())
return 0;
795 return (is_complex<scalar_t>() ? 4:1) *
796 blas::getrs_flops(b.rows(), b.cols()) /
800 template<
typename scalar_t>
long long int
801 LQ_flops(
const DistributedMatrix<scalar_t>& a) {
802 if (!a.active())
return 0;
803 auto minrc = std::min(a.rows(), a.cols());
804 return (is_complex<scalar_t>() ? 4:1) *
805 (blas::gelqf_flops(a.rows(), a.cols()) +
806 blas::xxglq_flops(a.cols(), a.cols(), minrc)) /
810 template<
typename scalar_t>
long long int
811 ID_row_flops(
const DistributedMatrix<scalar_t>& a,
int rank) {
812 if (!a.active())
return 0;
813 return (is_complex<scalar_t>() ? 4:1) *
814 (blas::geqp3_flops(a.cols(), a.rows())
815 + blas::trsm_flops(rank, a.cols() - rank, scalar_t(1.),
'L')) /
819 template<
typename scalar_t>
long long int
820 trsm_flops(
Side s, scalar_t alpha,
const DistributedMatrix<scalar_t>& a,
821 const DistributedMatrix<scalar_t>& b) {
822 if (!a.active())
return 0;
823 return (is_complex<scalar_t>() ? 4:1) *
824 blas::trsm_flops(b.rows(), b.cols(), alpha,
char(s)) /
828 template<
typename scalar_t>
long long int
830 const DistributedMatrix<scalar_t>& a,
831 const DistributedMatrix<scalar_t>& b, scalar_t beta) {
832 if (!a.active())
return 0;
833 return (is_complex<scalar_t>() ? 4:1) *
835 ((ta==
Trans::N) ? a.rows() : a.cols(),
836 (tb==
Trans::N) ? b.cols() : b.rows(),
837 (ta==
Trans::N) ? a.cols() : a.rows(), alpha, beta) /
841 template<
typename scalar_t>
long long int
842 gemv_flops(
Trans ta,
const DistributedMatrix<scalar_t>& a,
843 scalar_t alpha, scalar_t beta) {
844 if (!a.active())
return 0;
845 auto m = (ta==
Trans::N) ? a.rows() : a.cols();
846 auto n = (ta==
Trans::N) ? a.cols() : a.rows();
847 return (is_complex<scalar_t>() ? 4:1) *
848 ((alpha != scalar_t(0.)) * m * (n * 2 - 1) +
849 (alpha != scalar_t(1.) && alpha != scalar_t(0.)) * m +
850 (beta != scalar_t(0.) && beta != scalar_t(1.)) * m +
851 (alpha != scalar_t(0.) && beta != scalar_t(0.)) * m) /
855 template<
typename scalar_t>
long long int
857 if (!a.active())
return 0;
858 auto minrc = std::min(a.rows(), a.cols());
859 return (is_complex<scalar_t>() ? 4:1) *
860 (blas::geqrf_flops(a.rows(), minrc) +
861 blas::xxgqr_flops(a.rows(), minrc, minrc)) /
866 template<
typename scalar_t>
867 std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
868 ConstDistributedMatrixWrapperPtr
869 (std::size_t m, std::size_t n,
const DistributedMatrix<scalar_t>& D,
870 std::size_t i, std::size_t j) {
871 return std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
872 (
new DistributedMatrixWrapper<scalar_t>
873 (m, n,
const_cast<DistributedMatrix<scalar_t>&
>(D), i, j));
877 template<
typename scalar_t>
void gemm
878 (
Trans ta,
Trans tb, scalar_t alpha,
const DistributedMatrix<scalar_t>& A,
879 const DistributedMatrix<scalar_t>& B, scalar_t beta,
880 DistributedMatrix<scalar_t>& C);
882 template<
typename scalar_t>
void trsm
884 const DistributedMatrix<scalar_t>& A, DistributedMatrix<scalar_t>& B);
886 template<
typename scalar_t>
void trsv
887 (
UpLo ul,
Trans ta,
Diag d,
const DistributedMatrix<scalar_t>& A,
888 DistributedMatrix<scalar_t>& B);
890 template<
typename scalar_t>
void gemv
891 (
Trans ta, scalar_t alpha,
const DistributedMatrix<scalar_t>& A,
892 const DistributedMatrix<scalar_t>& X, scalar_t beta,
893 DistributedMatrix<scalar_t>& Y);
895 template<
typename scalar_t> DistributedMatrix<scalar_t>
vconcat
896 (
int cols,
int arows,
int brows,
const DistributedMatrix<scalar_t>& a,
897 const DistributedMatrix<scalar_t>& b,
const BLACSGrid* gnew,
int cxt_all);
899 template<
typename scalar_t>
void subgrid_copy_to_buffers
900 (
const DistributedMatrix<scalar_t>& a,
const DistributedMatrix<scalar_t>& b,
901 int p0,
int npr,
int npc, std::vector<std::vector<scalar_t>>& sbuf);
903 template<
typename scalar_t>
void subproc_copy_to_buffers
904 (
const DenseMatrix<scalar_t>& a,
const DistributedMatrix<scalar_t>& b,
905 int p0,
int npr,
int npc, std::vector<std::vector<scalar_t>>& sbuf);
907 template<
typename scalar_t>
void subgrid_add_from_buffers
908 (
const BLACSGrid* subg,
int master, DistributedMatrix<scalar_t>& b,
909 std::vector<scalar_t*>& pbuf);
Contains a wrapper class around a BLACS grid/context.
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
Contains some simple C++ MPI wrapper utilities.
This is a small wrapper class around a BLACS grid and a BLACS context.
Definition: BLACSGrid.hpp:66
bool active() const
Definition: BLACSGrid.hpp:264
int npcols() const
Definition: BLACSGrid.hpp:232
int ctxt_all() const
Definition: BLACSGrid.hpp:220
int npactives() const
Definition: BLACSGrid.hpp:257
int pcol() const
Definition: BLACSGrid.hpp:244
const MPIComm & Comm() const
Definition: BLACSGrid.hpp:196
int nprows() const
Definition: BLACSGrid.hpp:226
int ctxt() const
Definition: BLACSGrid.hpp:209
int prow() const
Definition: BLACSGrid.hpp:238
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Definition: DistributedMatrix.hpp:733
int J() const override
Definition: DistributedMatrix.hpp:763
int I() const override
Definition: DistributedMatrix.hpp:762
int rows() const override
Definition: DistributedMatrix.hpp:760
void lranges(int &rlo, int &rhi, int &clo, int &chi) const override
int cols() const override
Definition: DistributedMatrix.hpp:761
2D block cyclicly distributed matrix, as used by ScaLAPACK.
Definition: DistributedMatrix.hpp:84
const scalar_t * data() const
Definition: DistributedMatrix.hpp:346
int prow() const
Definition: DistributedMatrix.hpp:381
DistributedMatrix(const BLACSGrid *g, DenseMatrix< scalar_t > &&m)
DistributedMatrix(const DistributedMatrix< scalar_t > &m)
int ctxt_all() const
Definition: DistributedMatrix.hpp:262
int lcols() const
Definition: DistributedMatrix.hpp:283
virtual int I() const
Definition: DistributedMatrix.hpp:318
scalar_t & global(int r, int c)
Definition: DistributedMatrix.hpp:565
DistributedMatrix< scalar_t > & operator=(const DistributedMatrix< scalar_t > &m)
int rowblocks() const
Definition: DistributedMatrix.hpp:305
bool is_master() const
Definition: DistributedMatrix.hpp:416
DistributedMatrix< scalar_t > & operator=(DistributedMatrix< scalar_t > &&m)
int npactives() const
Definition: DistributedMatrix.hpp:406
const int * desc() const
Definition: DistributedMatrix.hpp:220
const BLACSGrid * grid() const
Definition: DistributedMatrix.hpp:235
int MB() const
Definition: DistributedMatrix.hpp:294
int rowl2g(int row) const
Definition: DistributedMatrix.hpp:425
static const int default_MB
Definition: DistributedMatrix.hpp:690
int pcol() const
Definition: DistributedMatrix.hpp:387
virtual int rows() const
Definition: DistributedMatrix.hpp:268
scalar_t & operator()(int r, int c)
Definition: DistributedMatrix.hpp:374
static const int default_NB
Definition: DistributedMatrix.hpp:697
int npcols() const
Definition: DistributedMatrix.hpp:399
DistributedMatrix(const BLACSGrid *g, int M, int N, const std::function< scalar_t(std::size_t, std::size_t)> &A)
bool is_local(int r, int c) const
Definition: DistributedMatrix.hpp:497
DistributedMatrix(const BLACSGrid *g, DenseMatrixWrapper< scalar_t > &&m)
const scalar_t & operator()(int r, int c) const
Definition: DistributedMatrix.hpp:363
int rank(int r, int c) const
Definition: DistributedMatrix.hpp:488
DistributedMatrix(const BLACSGrid *g, int M, int N, int MB, int NB)
int colblocks() const
Definition: DistributedMatrix.hpp:310
DistributedMatrix(const BLACSGrid *g, int M, int N)
scalar_t all_global(int r, int c) const
int coll2g(int col) const
Definition: DistributedMatrix.hpp:435
DistributedMatrix(const BLACSGrid *g, const DenseMatrix< scalar_t > &m)
int nprows() const
Definition: DistributedMatrix.hpp:393
MPI_Comm comm() const
Definition: DistributedMatrix.hpp:248
void global(int r, int c, scalar_t v)
Definition: DistributedMatrix.hpp:578
bool active() const
Definition: DistributedMatrix.hpp:229
int ld() const
Definition: DistributedMatrix.hpp:288
int colg2p(int col) const
Definition: DistributedMatrix.hpp:476
virtual int cols() const
Definition: DistributedMatrix.hpp:273
int ctxt() const
Definition: DistributedMatrix.hpp:254
DistributedMatrix(const BLACSGrid *g, int M, int N, const DistributedMatrix< scalar_t > &m, int context_all)
scalar_t * data()
Definition: DistributedMatrix.hpp:352
int lrows() const
Definition: DistributedMatrix.hpp:278
virtual int J() const
Definition: DistributedMatrix.hpp:325
const scalar_t & global(int r, int c) const
Definition: DistributedMatrix.hpp:551
int rowg2p(int row) const
Definition: DistributedMatrix.hpp:465
virtual void lranges(int &rlo, int &rhi, int &clo, int &chi) const
int colg2l(int col) const
Definition: DistributedMatrix.hpp:455
int rowg2l(int row) const
Definition: DistributedMatrix.hpp:445
const MPIComm & Comm() const
Definition: DistributedMatrix.hpp:242
virtual ~DistributedMatrix()
DistributedMatrix(DistributedMatrix< scalar_t > &&m)
int NB() const
Definition: DistributedMatrix.hpp:299
Wrapper class around an MPI_Comm object.
Definition: MPIWrapper.hpp:194
MPI_Comm comm() const
Definition: MPIWrapper.hpp:261
class to wrap the C++11 random number generator/distribution
Definition: RandomWrapper.hpp:105
Definition: StrumpackOptions.hpp:43
long long int LU_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1452
long long int gemm_flops(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta)
Definition: DenseMatrix.hpp:1491
UpLo
Definition: DenseMatrix.hpp:83
DenseMatrix< scalar_t > vconcat(const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1280
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
long long int orthogonalize_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1513
long long int trsm_flops(Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1483
Side
Definition: DenseMatrix.hpp:74
void copy(std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb)
Definition: DenseMatrix.hpp:1231
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
long long int ID_row_flops(const DenseMatrix< scalar_t > &a, int rank)
Definition: DenseMatrix.hpp:1475
long long int LQ_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1466
long long int solve_flops(const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1459
Diag
Definition: DenseMatrix.hpp:92