Loading...
Searching...
No Matches
DistributedMatrix.hpp
Go to the documentation of this file.
1/*
2 * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3 * Regents of the University of California, through Lawrence Berkeley
4 * National Laboratory (subject to receipt of any required approvals
5 * from the U.S. Dept. of Energy). All rights reserved.
6 *
7 * If you have questions about your rights to use or distribute this
8 * software, please contact Berkeley Lab's Technology Transfer
9 * Department at TTD@lbl.gov.
10 *
11 * NOTICE. This software is owned by the U.S. Department of Energy. As
12 * such, the U.S. Government has been granted for itself and others
13 * acting on its behalf a paid-up, nonexclusive, irrevocable,
14 * worldwide license in the Software to reproduce, prepare derivative
15 * works, and perform publicly and display publicly. Beginning five
16 * (5) years after the date permission to assert copyright is obtained
17 * from the U.S. Department of Energy, and subject to any subsequent
18 * five (5) year renewals, the U.S. Government is granted for itself
19 * and others acting on its behalf a paid-up, nonexclusive,
20 * irrevocable, worldwide license in the Software to reproduce,
21 * prepare derivative works, distribute copies to the public, perform
22 * publicly and display publicly, and to permit others to do so.
23 *
24 * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25 * (Lawrence Berkeley National Lab, Computational Research
26 * Division).
27 */
34#ifndef DISTRIBUTED_MATRIX_HPP
35#define DISTRIBUTED_MATRIX_HPP
36
37#include <vector>
38#include <string>
39#include <cmath>
40#include <functional>
41
42#include "misc/MPIWrapper.hpp"
43#include "misc/RandomWrapper.hpp"
44#include "DenseMatrix.hpp"
45#include "BLACSGrid.hpp"
46
47namespace strumpack {
48
49#ifndef DOXYGEN_SHOULD_SKIP_THIS
50 inline int indxl2g(int INDXLOC, int NB,
51 int IPROC, int ISRCPROC, int NPROCS) {
52 return NPROCS*NB*((INDXLOC-1)/NB) + (INDXLOC-1) % NB +
53 ((NPROCS+IPROC-ISRCPROC) % NPROCS)*NB + 1;
54 }
55 inline int indxg2l(int INDXGLOB, int NB,
56 int IPROC, int ISRCPROC, int NPROCS) {
57 return NB*((INDXGLOB-1)/(NB*NPROCS)) + (INDXGLOB-1) % NB + 1;
58 }
59 inline int indxg2p(int INDXGLOB, int NB,
60 int IPROC, int ISRCPROC, int NPROCS) {
61 return ( ISRCPROC + (INDXGLOB - 1) / NB ) % NPROCS;
62 }
63#endif
64
84 template<typename scalar_t> class DistributedMatrix {
85 using real_t = typename RealType<scalar_t>::value_type;
86
87 public:
88
93
105 DistributedMatrix(const BLACSGrid* g, int M, int N);
106
119 DistributedMatrix(const BLACSGrid* g, int M, int N,
120 const std::function<scalar_t(std::size_t,
121 std::size_t)>& A);
122
133
143
154
171 DistributedMatrix(const BLACSGrid* g, int M, int N,
173 int context_all);
174
183 DistributedMatrix(const BLACSGrid* g, int M, int N, int MB, int NB);
184
185
190
195
200
207
214
215
220 const int* desc() const { return desc_; }
221
229 bool active() const { return grid() && grid()->active(); }
230
235 const BLACSGrid* grid() const { return grid_; }
236
242 const MPIComm& Comm() const { return grid()->Comm(); }
243
248 MPI_Comm comm() const { return Comm().comm(); }
249
254 int ctxt() const { return grid() ? grid()->ctxt() : -1; }
255
262 int ctxt_all() const { return grid() ? grid()->ctxt_all() : -1; }
263
268 virtual int rows() const { return desc_[2]; }
273 virtual int cols() const { return desc_[3]; }
278 int lrows() const { return lrows_; }
283 int lcols() const { return lcols_; }
288 int ld() const { return lrows_; }
289
294 int MB() const { return desc_[4]; }
299 int NB() const { return desc_[5]; }
300
305 int rowblocks() const { return std::ceil(float(lrows()) / MB()); }
310 int colblocks() const { return std::ceil(float(lcols()) / NB()); }
311
318 virtual int I() const { return 1; }
325 virtual int J() const { return 1; }
326
340 virtual void lranges(int& rlo, int& rhi, int& clo, int& chi) const;
341
346 const scalar_t* data() const { return data_; }
347
352 scalar_t* data() { return data_; }
353
363 const scalar_t& operator()(int r, int c) const { return data_[r+ld()*c]; }
364
374 scalar_t& operator()(int r, int c) { return data_[r+ld()*c]; }
375
381 int prow() const { assert(grid()); return grid()->prow(); }
387 int pcol() const { assert(grid()); return grid()->pcol(); }
393 int nprows() const { assert(grid()); return grid()->nprows(); }
399 int npcols() const { assert(grid()); return grid()->npcols(); }
406 int npactives() const {
407 assert(grid());
408 return grid()->npactives();
409 }
410
416 bool is_master() const {
417 return grid() && prow() == 0 && pcol() == 0;
418 }
425 int rowl2g(int row) const {
426 assert(grid());
427 return indxl2g(row+1, MB(), prow(), 0, nprows()) - I();
428 }
435 int coll2g(int col) const {
436 assert(grid());
437 return indxl2g(col+1, NB(), pcol(), 0, npcols()) - J();
438 }
445 int rowg2l(int row) const {
446 assert(grid());
447 return indxg2l(row+I(), MB(), prow(), 0, nprows()) - 1;
448 }
455 int colg2l(int col) const {
456 assert(grid());
457 return indxg2l(col+J(), NB(), pcol(), 0, npcols()) - 1;
458 }
465 int rowg2p(int row) const {
466 assert(grid());
467 return indxg2p(row+I(), MB(), prow(), 0, nprows());
468 }
476 int colg2p(int col) const {
477 assert(grid());
478 return indxg2p(col+J(), NB(), pcol(), 0, npcols());
479 }
488 int rank(int r, int c) const {
489 return rowg2p(r) + colg2p(c) * nprows();
490 }
497 bool is_local(int r, int c) const {
498 assert(grid());
499 return rowg2p(r) == prow() && colg2p(c) == pcol();
500 }
501
502#ifndef DOXYGEN_SHOULD_SKIP_THIS
503 bool fixed() const { return MB()==default_MB && NB()==default_NB; }
504 int rowl2g_fixed(int row) const {
505 assert(grid() && fixed());
506 return indxl2g(row+1, default_MB, prow(), 0, nprows()) - I();
507 }
508 int coll2g_fixed(int col) const {
509 assert(grid() && fixed());
510 return indxl2g(col+1, default_NB, pcol(), 0, npcols()) - J();
511 }
512 int rowg2l_fixed(int row) const {
513 assert(grid() && fixed());
514 return indxg2l(row+I(), default_MB, prow(), 0, nprows()) - 1;
515 }
516 int colg2l_fixed(int col) const {
517 assert(grid() && fixed());
518 return indxg2l(col+J(), default_NB, pcol(), 0, npcols()) - 1;
519 }
520 int rowg2p_fixed(int row) const {
521 assert(grid() && fixed());
522 return indxg2p(row+I(), default_MB, prow(), 0, nprows());
523 }
524 int colg2p_fixed(int col) const {
525 assert(grid() && fixed());
526 return indxg2p(col+J(), default_NB, pcol(), 0, npcols());
527 }
528 int rank_fixed(int r, int c) const {
529 assert(grid() && fixed()); return rowg2p_fixed(r) + colg2p_fixed(c) * nprows();
530 }
531 bool is_local_fixed(int r, int c) const {
532 assert(grid() && fixed());
533 return rowg2p_fixed(r) == prow() && colg2p_fixed(c) == pcol();
534 }
535 scalar_t& global_fixed(int r, int c) {
536 assert(is_local(r, c)); assert(fixed());
537 return operator()(rowg2l_fixed(r),colg2l_fixed(c));
538 }
539#endif
540
551 const scalar_t& global(int r, int c) const {
552 assert(is_local(r, c));
553 return operator()(rowg2l(r),colg2l(c));
554 }
565 scalar_t& global(int r, int c) {
566 assert(is_local(r, c)); return operator()(rowg2l(r),colg2l(c));
567 }
578 void global(int r, int c, scalar_t v) {
579 if (active() && is_local(r, c)) operator()(rowg2l(r),colg2l(c)) = v;
580 }
592 scalar_t all_global(int r, int c) const;
593
594 void print() const { print("A"); }
595 void print(std::string name, int precision=15) const;
596 void print_to_file(std::string name, std::string filename,
597 int width=8) const;
598 void print_to_files(std::string name, int precision=16) const;
599 void random();
600 void random(random::RandomGeneratorBase<typename RealType<scalar_t>::
601 value_type>& rgen);
602 void zero();
603 void fill(scalar_t a);
604 void fill(const std::function<scalar_t(std::size_t,
605 std::size_t)>& A);
606 void eye();
607 void shift(scalar_t sigma);
608 scalar_t trace() const;
609 void clear();
610 virtual void resize(std::size_t m, std::size_t n);
611 virtual void hconcat(const DistributedMatrix<scalar_t>& b);
612 DistributedMatrix<scalar_t> transpose() const;
613
614 void mult(Trans op, const DistributedMatrix<scalar_t>& X,
616
617 void laswp(const std::vector<int>& P, bool fwd);
618
620 extract_rows(const std::vector<std::size_t>& Ir) const;
622 extract_cols(const std::vector<std::size_t>& Ic) const;
623
625 extract(const std::vector<std::size_t>& I,
626 const std::vector<std::size_t>& J) const;
629 scaled_add(scalar_t alpha, const DistributedMatrix<scalar_t>& B);
631 scale_and_add(scalar_t alpha, const DistributedMatrix<scalar_t>& B);
632
633 real_t norm() const;
634 real_t normF() const;
635 real_t norm1() const;
636 real_t normI() const;
637
638 virtual std::size_t memory() const {
639 return sizeof(scalar_t)*std::size_t(lrows())*std::size_t(lcols());
640 }
641 virtual std::size_t total_memory() const {
642 return sizeof(scalar_t)*std::size_t(rows())*std::size_t(cols());
643 }
644 virtual std::size_t nonzeros() const {
645 return std::size_t(lrows())*std::size_t(lcols());
646 }
647 virtual std::size_t total_nonzeros() const {
648 return std::size_t(rows())*std::size_t(cols());
649 }
650
651 void scatter(const DenseMatrix<scalar_t>& a);
652 DenseMatrix<scalar_t> gather() const;
653 DenseMatrix<scalar_t> all_gather() const;
654
655 DenseMatrix<scalar_t> dense_and_clear();
656 DenseMatrix<scalar_t> dense() const;
657 DenseMatrixWrapper<scalar_t> dense_wrapper();
658
659 std::vector<int> LU();
660 int LU(std::vector<int>&);
661
662 DistributedMatrix<scalar_t>
663 solve(const DistributedMatrix<scalar_t>& b,
664 const std::vector<int>& piv) const;
665
666 void LQ(DistributedMatrix<scalar_t>& L,
667 DistributedMatrix<scalar_t>& Q) const;
668
669 void orthogonalize(scalar_t& r_max, scalar_t& r_min);
670
671 void ID_column(DistributedMatrix<scalar_t>& X, std::vector<int>& piv,
672 std::vector<std::size_t>& ind,
673 real_t rel_tol, real_t abs_tol, int max_rank);
674 void ID_row(DistributedMatrix<scalar_t>& X, std::vector<int>& piv,
675 std::vector<std::size_t>& ind, real_t rel_tol, real_t abs_tol,
676 int max_rank, const BLACSGrid* grid_T);
677
678
679 std::size_t subnormals() const;
680 std::size_t zeros() const;
681
694 static const int default_MB = STRUMPACK_PBLAS_BLOCKSIZE;
701 static const int default_NB = STRUMPACK_PBLAS_BLOCKSIZE;
702
703 protected:
704 const BLACSGrid* grid_ = nullptr;
705 scalar_t* data_ = nullptr;
706 int lrows_;
707 int lcols_;
708 int desc_[9];
709 };
710
715 template<typename scalar_t> void
716 copy(std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& a,
717 std::size_t ia, std::size_t ja, DenseMatrix<scalar_t>& b,
718 int dest, int context_all);
719
720 template<typename scalar_t> void
721 copy(std::size_t m, std::size_t n, const DenseMatrix<scalar_t>& a, int src,
722 DistributedMatrix<scalar_t>& b, std::size_t ib, std::size_t jb,
723 int context_all);
724
726 template<typename scalar_t> void
727 copy(std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& a,
728 std::size_t ia, std::size_t ja, DistributedMatrix<scalar_t>& b,
729 std::size_t ib, std::size_t jb, int context_all);
730
736 template<typename scalar_t>
738 private:
739 int _rows, _cols, _i, _j;
740 public:
742 _rows(0), _cols(0), _i(0), _j(0) {}
743
747 DistributedMatrixWrapper(std::size_t m, std::size_t n,
749 std::size_t i, std::size_t j);
750 DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
751 scalar_t* A);
752 DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
753 int MB, int NB, scalar_t* A);
754 DistributedMatrixWrapper(const BLACSGrid* g, std::size_t m, std::size_t n,
756
757 virtual ~DistributedMatrixWrapper() { this->data_ = nullptr; }
758
760 operator=(const DistributedMatrixWrapper<scalar_t>& A);
763
764 int rows() const override { return _rows; }
765 int cols() const override { return _cols; }
766 int I() const override { return _i+1; }
767 int J() const override { return _j+1; }
768 void lranges(int& rlo, int& rhi, int& clo, int& chi) const override;
769
770 void resize(std::size_t m, std::size_t n) override { assert(1); }
771 void hconcat(const DistributedMatrix<scalar_t>& b) override { assert(1); }
772 void clear()
773 { this->data_ = nullptr; DistributedMatrix<scalar_t>::clear(); }
774 std::size_t memory() const override { return 0; }
775 std::size_t total_memory() const override { return 0; }
776 std::size_t nonzeros() const override { return 0; }
777 std::size_t total_nonzeros() const override { return 0; }
778
779 DenseMatrix<scalar_t> dense_and_clear() = delete;
780 DenseMatrixWrapper<scalar_t> dense_wrapper() = delete;
781 DistributedMatrixWrapper<scalar_t>&
782 operator=(const DistributedMatrix<scalar_t>&) = delete;
783 DistributedMatrixWrapper<scalar_t>&
784 operator=(DistributedMatrix<scalar_t>&&) = delete;
785 };
786
787
788 template<typename scalar_t> long long int
789 LU_flops(const DistributedMatrix<scalar_t>& a) {
790 if (!a.active()) return 0;
791 return (is_complex<scalar_t>() ? 4:1) *
792 blas::getrf_flops(a.rows(), a.cols()) /
793 a.npactives();
794 }
795
796 template<typename scalar_t> long long int
797 solve_flops(const DistributedMatrix<scalar_t>& b) {
798 if (!b.active()) return 0;
799 return (is_complex<scalar_t>() ? 4:1) *
800 blas::getrs_flops(b.rows(), b.cols()) /
801 b.npactives();
802 }
803
804 template<typename scalar_t> long long int
805 LQ_flops(const DistributedMatrix<scalar_t>& a) {
806 if (!a.active()) return 0;
807 auto minrc = std::min(a.rows(), a.cols());
808 return (is_complex<scalar_t>() ? 4:1) *
809 (blas::gelqf_flops(a.rows(), a.cols()) +
810 blas::xxglq_flops(a.cols(), a.cols(), minrc)) /
811 a.npactives();
812 }
813
814 template<typename scalar_t> long long int
815 ID_row_flops(const DistributedMatrix<scalar_t>& a, int rank) {
816 if (!a.active()) return 0;
817 return (is_complex<scalar_t>() ? 4:1) *
818 (blas::geqp3_flops(a.cols(), a.rows())
819 + blas::trsm_flops(rank, a.cols() - rank, scalar_t(1.), 'L')) /
820 a.npactives();
821 }
822
823 template<typename scalar_t> long long int
824 trsm_flops(Side s, scalar_t alpha, const DistributedMatrix<scalar_t>& a,
825 const DistributedMatrix<scalar_t>& b) {
826 if (!a.active()) return 0;
827 return (is_complex<scalar_t>() ? 4:1) *
828 blas::trsm_flops(b.rows(), b.cols(), alpha, char(s)) /
829 a.npactives();
830 }
831
832 template<typename scalar_t> long long int
833 gemm_flops(Trans ta, Trans tb, scalar_t alpha,
834 const DistributedMatrix<scalar_t>& a,
835 const DistributedMatrix<scalar_t>& b, scalar_t beta) {
836 if (!a.active()) return 0;
837 return (is_complex<scalar_t>() ? 4:1) *
838 blas::gemm_flops
839 ((ta==Trans::N) ? a.rows() : a.cols(),
840 (tb==Trans::N) ? b.cols() : b.rows(),
841 (ta==Trans::N) ? a.cols() : a.rows(), alpha, beta) /
842 a.npactives();
843 }
844
845 template<typename scalar_t> long long int
846 gemv_flops(Trans ta, const DistributedMatrix<scalar_t>& a,
847 scalar_t alpha, scalar_t beta) {
848 if (!a.active()) return 0;
849 auto m = (ta==Trans::N) ? a.rows() : a.cols();
850 auto n = (ta==Trans::N) ? a.cols() : a.rows();
851 return (is_complex<scalar_t>() ? 4:1) *
852 ((alpha != scalar_t(0.)) * m * (n * 2 - 1) +
853 (alpha != scalar_t(1.) && alpha != scalar_t(0.)) * m +
854 (beta != scalar_t(0.) && beta != scalar_t(1.)) * m +
855 (alpha != scalar_t(0.) && beta != scalar_t(0.)) * m) /
856 a.npactives();
857 }
858
859 template<typename scalar_t> long long int
860 orthogonalize_flops(const DistributedMatrix<scalar_t>& a) {
861 if (!a.active()) return 0;
862 auto minrc = std::min(a.rows(), a.cols());
863 return (is_complex<scalar_t>() ? 4:1) *
864 (blas::geqrf_flops(a.rows(), minrc) +
865 blas::xxgqr_flops(a.rows(), minrc, minrc)) /
866 a.npactives();
867 }
868
869
870 template<typename scalar_t>
871 std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
872 ConstDistributedMatrixWrapperPtr
873 (std::size_t m, std::size_t n, const DistributedMatrix<scalar_t>& D,
874 std::size_t i, std::size_t j) {
875 return std::unique_ptr<const DistributedMatrixWrapper<scalar_t>>
876 (new DistributedMatrixWrapper<scalar_t>
877 (m, n, const_cast<DistributedMatrix<scalar_t>&>(D), i, j));
878 }
879
880
881 template<typename scalar_t> void gemm
882 (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix<scalar_t>& A,
883 const DistributedMatrix<scalar_t>& B, scalar_t beta,
884 DistributedMatrix<scalar_t>& C);
885
886 template<typename scalar_t> void trsm
887 (Side s, UpLo u, Trans ta, Diag d, scalar_t alpha,
888 const DistributedMatrix<scalar_t>& A, DistributedMatrix<scalar_t>& B);
889
890 template<typename scalar_t> void trsv
891 (UpLo ul, Trans ta, Diag d, const DistributedMatrix<scalar_t>& A,
892 DistributedMatrix<scalar_t>& B);
893
894 template<typename scalar_t> void gemv
895 (Trans ta, scalar_t alpha, const DistributedMatrix<scalar_t>& A,
896 const DistributedMatrix<scalar_t>& X, scalar_t beta,
897 DistributedMatrix<scalar_t>& Y);
898
899 template<typename scalar_t> DistributedMatrix<scalar_t> vconcat
900 (int cols, int arows, int brows, const DistributedMatrix<scalar_t>& a,
901 const DistributedMatrix<scalar_t>& b, const BLACSGrid* gnew, int cxt_all);
902
903 template<typename scalar_t> void subgrid_copy_to_buffers
904 (const DistributedMatrix<scalar_t>& a, const DistributedMatrix<scalar_t>& b,
905 int p0, int npr, int npc, std::vector<std::vector<scalar_t>>& sbuf);
906
907 template<typename scalar_t> void subproc_copy_to_buffers
908 (const DenseMatrix<scalar_t>& a, const DistributedMatrix<scalar_t>& b,
909 int p0, int npr, int npc, std::vector<std::vector<scalar_t>>& sbuf);
910
911 template<typename scalar_t> void subgrid_add_from_buffers
912 (const BLACSGrid* subg, int master, DistributedMatrix<scalar_t>& b,
913 std::vector<scalar_t*>& pbuf);
914
915} // end namespace strumpack
916
917#endif // DISTRIBUTED_MATRIX_HPP
Contains a wrapper class around a BLACS grid/context.
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
Contains some simple C++ MPI wrapper utilities.
This is a small wrapper class around a BLACS grid and a BLACS context.
Definition BLACSGrid.hpp:66
bool active() const
Definition BLACSGrid.hpp:264
const MPIComm & Comm() const
Definition BLACSGrid.hpp:196
int npcols() const
Definition BLACSGrid.hpp:232
int ctxt_all() const
Definition BLACSGrid.hpp:220
int npactives() const
Definition BLACSGrid.hpp:257
int pcol() const
Definition BLACSGrid.hpp:244
int nprows() const
Definition BLACSGrid.hpp:226
int ctxt() const
Definition BLACSGrid.hpp:209
int prow() const
Definition BLACSGrid.hpp:238
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition DenseMatrix.hpp:1018
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition DenseMatrix.hpp:139
Definition DistributedMatrix.hpp:737
int J() const override
Definition DistributedMatrix.hpp:767
int I() const override
Definition DistributedMatrix.hpp:766
int rows() const override
Definition DistributedMatrix.hpp:764
void lranges(int &rlo, int &rhi, int &clo, int &chi) const override
int cols() const override
Definition DistributedMatrix.hpp:765
2D block cyclicly distributed matrix, as used by ScaLAPACK.
Definition DistributedMatrix.hpp:84
int prow() const
Definition DistributedMatrix.hpp:381
DistributedMatrix(const BLACSGrid *g, DenseMatrix< scalar_t > &&m)
DistributedMatrix(const DistributedMatrix< scalar_t > &m)
int ctxt_all() const
Definition DistributedMatrix.hpp:262
int lcols() const
Definition DistributedMatrix.hpp:283
virtual int I() const
Definition DistributedMatrix.hpp:318
int rowblocks() const
Definition DistributedMatrix.hpp:305
bool is_master() const
Definition DistributedMatrix.hpp:416
const MPIComm & Comm() const
Definition DistributedMatrix.hpp:242
DistributedMatrix< scalar_t > & operator=(const DistributedMatrix< scalar_t > &m)
int npactives() const
Definition DistributedMatrix.hpp:406
int MB() const
Definition DistributedMatrix.hpp:294
int rowl2g(int row) const
Definition DistributedMatrix.hpp:425
static const int default_MB
Definition DistributedMatrix.hpp:694
int pcol() const
Definition DistributedMatrix.hpp:387
virtual int rows() const
Definition DistributedMatrix.hpp:268
static const int default_NB
Definition DistributedMatrix.hpp:701
int npcols() const
Definition DistributedMatrix.hpp:399
const scalar_t * data() const
Definition DistributedMatrix.hpp:346
DistributedMatrix< scalar_t > & operator=(DistributedMatrix< scalar_t > &&m)
DistributedMatrix(const BLACSGrid *g, int M, int N, const std::function< scalar_t(std::size_t, std::size_t)> &A)
bool is_local(int r, int c) const
Definition DistributedMatrix.hpp:497
DistributedMatrix(const BLACSGrid *g, DenseMatrixWrapper< scalar_t > &&m)
int rank(int r, int c) const
Definition DistributedMatrix.hpp:488
DistributedMatrix(const BLACSGrid *g, int M, int N, int MB, int NB)
int colblocks() const
Definition DistributedMatrix.hpp:310
DistributedMatrix(const BLACSGrid *g, int M, int N)
scalar_t all_global(int r, int c) const
int coll2g(int col) const
Definition DistributedMatrix.hpp:435
DistributedMatrix(const BLACSGrid *g, const DenseMatrix< scalar_t > &m)
int nprows() const
Definition DistributedMatrix.hpp:393
MPI_Comm comm() const
Definition DistributedMatrix.hpp:248
void global(int r, int c, scalar_t v)
Definition DistributedMatrix.hpp:578
bool active() const
Definition DistributedMatrix.hpp:229
int ld() const
Definition DistributedMatrix.hpp:288
scalar_t * data()
Definition DistributedMatrix.hpp:352
int colg2p(int col) const
Definition DistributedMatrix.hpp:476
virtual int cols() const
Definition DistributedMatrix.hpp:273
scalar_t & operator()(int r, int c)
Definition DistributedMatrix.hpp:374
int ctxt() const
Definition DistributedMatrix.hpp:254
DistributedMatrix(const BLACSGrid *g, int M, int N, const DistributedMatrix< scalar_t > &m, int context_all)
int lrows() const
Definition DistributedMatrix.hpp:278
scalar_t & global(int r, int c)
Definition DistributedMatrix.hpp:565
virtual int J() const
Definition DistributedMatrix.hpp:325
const int * desc() const
Definition DistributedMatrix.hpp:220
int rowg2p(int row) const
Definition DistributedMatrix.hpp:465
virtual void lranges(int &rlo, int &rhi, int &clo, int &chi) const
int colg2l(int col) const
Definition DistributedMatrix.hpp:455
int rowg2l(int row) const
Definition DistributedMatrix.hpp:445
DistributedMatrix(DistributedMatrix< scalar_t > &&m)
int NB() const
Definition DistributedMatrix.hpp:299
const scalar_t & global(int r, int c) const
Definition DistributedMatrix.hpp:551
const scalar_t & operator()(int r, int c) const
Definition DistributedMatrix.hpp:363
const BLACSGrid * grid() const
Definition DistributedMatrix.hpp:235
Wrapper class around an MPI_Comm object.
Definition MPIWrapper.hpp:173
MPI_Comm comm() const
Definition MPIWrapper.hpp:240
class to wrap the C++11 random number generator/distribution
Definition RandomWrapper.hpp:105
Definition StrumpackOptions.hpp:44
long long int LU_flops(const DenseMatrix< scalar_t > &a)
Definition DenseMatrix.hpp:1459
long long int gemm_flops(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta)
Definition DenseMatrix.hpp:1498
UpLo
Definition DenseMatrix.hpp:83
DenseMatrix< scalar_t > vconcat(const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition DenseMatrix.hpp:1287
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
long long int orthogonalize_flops(const DenseMatrix< scalar_t > &a)
Definition DenseMatrix.hpp:1520
long long int trsm_flops(Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition DenseMatrix.hpp:1490
Side
Definition DenseMatrix.hpp:74
void copy(std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb)
Definition DenseMatrix.hpp:1238
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
long long int ID_row_flops(const DenseMatrix< scalar_t > &a, int rank)
Definition DenseMatrix.hpp:1482
long long int LQ_flops(const DenseMatrix< scalar_t > &a)
Definition DenseMatrix.hpp:1473
long long int solve_flops(const DenseMatrix< scalar_t > &b)
Definition DenseMatrix.hpp:1466
Diag
Definition DenseMatrix.hpp:93