32 #ifndef BLR_MATRIX_MPI_HPP 
   33 #define BLR_MATRIX_MPI_HPP 
   37 #include "BLRTile.hpp" 
   42   template<
typename scalar_t,
typename integer_t> 
class ExtendAdd;
 
   74       const MPIComm& Comm()
 const { 
return comm_; }
 
   75       int nprows()
 const { 
return nprows_; }
 
   76       int npcols()
 const { 
return npcols_; }
 
   77       int prow()
 const { 
return prow_; }
 
   78       int pcol()
 const { 
return pcol_; }
 
   79       int rank()
 const { 
return Comm().
rank(); }
 
   80       int npactives()
 const { 
return nprows()*npcols(); }
 
   81       bool active()
 const { 
return active_; }
 
   83       const MPIComm& row_comm()
 const { 
return rowcomm_; }
 
   84       const MPIComm& col_comm()
 const { 
return colcomm_; }
 
   86       bool is_local_row(
int i)
 const { 
return i % nprows_ == prow_; }
 
   87       bool is_local_col(
int i)
 const { 
return i % npcols_ == pcol_; }
 
   88       bool is_local(
int i, 
int j)
 const 
   89       { 
return is_local_row(i) && is_local_col(j); }
 
   91       int rg2p(
int i)
 const { 
return i % nprows(); }
 
   92       int cg2p(
int j)
 const { 
return j % npcols(); }
 
   93       int g2p(
int i, 
int j)
 const { 
return rg2p(i) + cg2p(j) * nprows(); }
 
   97           std::cout << 
"# ProcessorGrid2D: " 
   98                     << 
"[" << nprows() << 
" x " << npcols() << 
"]" 
  103       bool active_ = 
false;
 
  104       int prow_ = -1, pcol_ = -1;
 
  105       int nprows_ = 0, npcols_ = 0;
 
  106       MPIComm comm_, rowcomm_, colcomm_;
 
  130       using vec_t = std::vector<std::size_t>;
 
  137                    const vec_t& Rt, 
const vec_t& Ct);
 
  139       std::size_t 
rows()
 const override { 
return rows_; }
 
  140       std::size_t 
cols()
 const override { 
return cols_; }
 
  144       std::size_t 
rank() 
const override;
 
  145       std::size_t total_memory() 
const;
 
  146       std::size_t total_nonzeros() 
const;
 
  147       std::size_t max_rank() 
const;
 
  149       const MPIComm& Comm()
 const { 
return grid_->Comm(); }
 
  153       bool active()
 const { 
return grid_->active(); }
 
  155       void fill(scalar_t v);
 
  156       void fill_col(scalar_t v, std::size_t k, std::size_t CP);
 
  158       std::vector<int> 
factor(
const Opts_t& opts);
 
  159       std::vector<int> 
factor(
const adm_t& adm, 
const Opts_t& opts);
 
  160       std::vector<int> factor_col(
const adm_t& adm, 
const Opts_t& opts,
 
  161                                       const std::function<
void(
int, 
bool, std::size_t)>& blockcol);
 
  163       void laswp(
const std::vector<int>& piv, 
bool fwd);
 
  166       std::vector<int> partial_factor(BLRMPI_t& A11, BLRMPI_t& A12,
 
  167                                       BLRMPI_t& A21, BLRMPI_t& A22,
 
  168                                       const adm_t& adm, 
const Opts_t& opts);
 
  171       std::vector<int> partial_factor_col(BLRMPI_t& F11, BLRMPI_t& F12, BLRMPI_t& F21, 
 
  172                                   BLRMPI_t& F22, 
const adm_t& adm, 
const Opts_t& opts, 
 
  173                                   const std::function<
void(
int, 
bool, std::size_t)>& blockcol);
 
  175       void compress(
const Opts_t& opts);
 
  178       BLRMPI_t from_ScaLAPACK(
const DistM_t& A, 
const ProcessorGrid2D& g,
 
  181       BLRMPI_t from_ScaLAPACK(
const DistM_t& A, 
const ProcessorGrid2D& g,
 
  182                               const vec_t& Rt, 
const vec_t& Ct);
 
  183       DistM_t to_ScaLAPACK(
const BLACSGrid* g) 
const;
 
  184       void to_ScaLAPACK(DistM_t& A) 
const;
 
  186       void print(
const std::string& name);
 
  188       std::size_t rowblocks()
 const { 
return brows_; }
 
  189       std::size_t colblocks()
 const { 
return bcols_; }
 
  190       std::size_t rowblockslocal()
 const { 
return lbrows_; }
 
  191       std::size_t colblockslocal()
 const { 
return lbcols_; }
 
  192       std::size_t tilerows(std::size_t i)
 const { 
return roff_[i+1] - roff_[i]; }
 
  193       std::size_t tilecols(std::size_t j)
 const { 
return coff_[j+1] - coff_[j]; }
 
  194       std::size_t tileroff(std::size_t i)
 const { assert(i <= rowblocks()); 
return roff_[i]; }
 
  195       std::size_t tilecoff(std::size_t j)
 const { assert(j <= colblocks()); 
return coff_[j]; }
 
  197       int rg2p(std::size_t i) 
const;
 
  198       int cg2p(std::size_t j) 
const;
 
  199       std::size_t rl2g(std::size_t i) 
const;
 
  200       std::size_t cl2g(std::size_t j) 
const;
 
  201       std::size_t rg2t(std::size_t i) 
const;
 
  202       std::size_t cg2t(std::size_t j) 
const;
 
  204       std::size_t lrows()
 const { 
return lrows_; }
 
  205       std::size_t lcols()
 const { 
return lcols_; }
 
  212       scalar_t& 
operator()(std::size_t i, std::size_t j);
 
  214       scalar_t get_element_and_decompress_HODBF(
int tr, 
int tc, 
int lr, 
int lc);
 
  215       void decompress_local_columns(
int c_min, 
int c_max);
 
  216       void remove_tiles_before_local_column(
int c_min, 
int c_max);
 
  222       const scalar_t& 
global(std::size_t i, std::size_t j) 
const;
 
  223       scalar_t& 
global(std::size_t i, std::size_t j);
 
  226       std::size_t rows_ = 0, cols_ = 0, lrows_ = 0, lcols_ = 0;
 
  227       std::size_t brows_ = 0, bcols_ = 0, lbrows_ = 0, lbcols_ = 0;
 
  229       vec_t rl2t_, cl2t_, rl2l_, cl2l_, rl2g_, cl2g_;
 
  230       std::vector<std::unique_ptr<BLRTile<scalar_t>>> blocks_;
 
  233       std::size_t tilerg2l(std::size_t i)
 const {
 
  234         assert(
int(i % grid_->nprows()) == grid_->prow());
 
  235         return i / grid_->nprows();
 
  237       std::size_t tilecg2l(std::size_t j)
 const {
 
  238         assert(
int(j % grid_->npcols()) == grid_->pcol());
 
  239         return j / grid_->npcols();
 
  242       BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) {
 
  243         return ltile(tilerg2l(i), tilecg2l(j));
 
  245       const BLRTile<scalar_t>& tile(std::size_t i, std::size_t j)
 const {
 
  246         return ltile(tilerg2l(i), tilecg2l(j));
 
  248       DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) {
 
  249         return ltile_dense(tilerg2l(i), tilecg2l(j));
 
  251       const DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j)
 const {
 
  252         return ltile_dense(tilerg2l(i), tilecg2l(j));
 
  255       BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) {
 
  256         assert(i < rowblockslocal() && j < colblockslocal());
 
  257         return *blocks_[i+j*rowblockslocal()].get();
 
  259       const BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j)
 const {
 
  260         assert(i < rowblockslocal() && j < colblockslocal());
 
  261         return *blocks_[i+j*rowblockslocal()].get();
 
  264       DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) {
 
  265         assert(i < rowblockslocal() && j < colblockslocal());
 
  266         assert(
dynamic_cast<DenseTile<scalar_t>*
> 
  267                (blocks_[i+j*rowblockslocal()].get()));
 
  268         return *
static_cast<DenseTile<scalar_t>*
> 
  269                    (blocks_[i+j*rowblockslocal()].get());
 
  271       const DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j)
 const {
 
  272         assert(i < rowblockslocal() && j < colblockslocal());
 
  273         assert(
dynamic_cast<const DenseTile<scalar_t>*
> 
  274                (blocks_[i+j*rowblockslocal()].get()));
 
  275         return *
static_cast<const DenseTile<scalar_t>*
> 
  276                    (blocks_[i+j*rowblockslocal()].get());
 
  279       std::unique_ptr<BLRTile<scalar_t>>&
 
  280       block(std::size_t i, std::size_t j) {
 
  281         assert(i < rowblocks() && j < colblocks());
 
  282         return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
 
  284       const std::unique_ptr<BLRTile<scalar_t>>&
 
  285       block(std::size_t i, std::size_t j)
 const {
 
  286         assert(i < rowblocks() && j < colblocks());
 
  287         return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
 
  290       std::unique_ptr<BLRTile<scalar_t>>&
 
  291       lblock(std::size_t i, std::size_t j) {
 
  292         assert(i < rowblockslocal() && j < colblockslocal());
 
  293         return blocks_[i+j*rowblockslocal()];
 
  295       const std::unique_ptr<BLRTile<scalar_t>>&
 
  296       lblock(std::size_t i, std::size_t j)
 const {
 
  297         assert(i < rowblockslocal() && j < colblockslocal());
 
  298         return blocks_[i+j*rowblockslocal()];
 
  301       void compress_tile(std::size_t i, std::size_t j, 
const Opts_t& opts);
 
  304       bcast_dense_tile_along_col(std::size_t i, std::size_t j) 
const;
 
  306       bcast_dense_tile_along_row(std::size_t i, std::size_t j) 
const;
 
  308       std::vector<std::unique_ptr<BLRTile<scalar_t>>>
 
  309       bcast_row_of_tiles_along_cols(std::size_t i,
 
  310                                     std::size_t j0, std::size_t j1) 
const;
 
  311       std::vector<std::unique_ptr<BLRTile<scalar_t>>>
 
  312       bcast_col_of_tiles_along_rows(std::size_t i0, std::size_t i1,
 
  313                                     std::size_t j) 
const;
 
  315       std::vector<std::unique_ptr<BLRTile<scalar_t>>>
 
  316       gather_rows(std::size_t i0, std::size_t i1,
 
  317                   std::size_t j0, std::size_t j1) 
const;
 
  319       std::vector<std::unique_ptr<BLRTile<scalar_t>>>
 
  320       gather_cols(std::size_t i0, std::size_t i1,
 
  321                   std::size_t j0, std::size_t j1) 
const;
 
  323       std::vector<std::unique_ptr<BLRTile<scalar_t>>>
 
  324       gather_row(std::size_t i0, std::size_t k,
 
  325                  std::size_t j0, std::size_t j1) 
const;
 
  327       std::vector<std::unique_ptr<BLRTile<scalar_t>>>
 
  328       gather_col(std::size_t i0, std::size_t i1,
 
  329                  std::size_t j0, std::size_t k) 
const;
 
  331       std::vector<std::unique_ptr<BLRTile<scalar_t>>>
 
  332       gather_rows_A22(std::size_t i1, std::size_t j1) 
const;
 
  334       std::vector<std::unique_ptr<BLRTile<scalar_t>>>
 
  335       gather_cols_A22(std::size_t i1, std::size_t j1) 
const;
 
  337       template<
typename T> 
friend void 
  340       template<
typename T> 
friend void 
  341       gemv(
Trans ta, T alpha, 
const BLRMatrixMPI<T>& a,
 
  342            const BLRMatrixMPI<T>& x, T beta, BLRMatrixMPI<T>& y);
 
  343       template<
typename T> 
friend void 
  345            const BLRMatrixMPI<T>& a, BLRMatrixMPI<T>& b);
 
  346       template<
typename T> 
friend void 
  347       gemm(
Trans ta, 
Trans tb, T alpha, 
const BLRMatrixMPI<T>& a,
 
  348            const BLRMatrixMPI<T>& b, T beta, BLRMatrixMPI<T>& c);
 
  351       template<
typename T,
typename I> 
friend class BLRExtendAdd;
 
  354     template<
typename scalar_t> 
void 
  355     LUAR(std::size_t kmax, std::size_t lk,
 
  356          std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
 
  357          std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
 
  358          DenseMatrix<scalar_t>& tij, 
const BLROptions<scalar_t>& opts,
 
  361     template<
typename scalar_t> 
void 
  362     LUAR_A22(std::size_t kmax, std::size_t lj, std::size_t lk,
 
  363              std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
 
  364              std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
 
  365              DenseMatrix<scalar_t>& tij, 
const BLROptions<scalar_t>& opts,
 
  368     template<
typename scalar_t> 
void 
  370          BLRMatrixMPI<scalar_t>& b);
 
  371     template<
typename scalar_t> 
void 
  372     gemv(
Trans ta, scalar_t alpha, 
const BLRMatrixMPI<scalar_t>& a,
 
  373          const BLRMatrixMPI<scalar_t>& x, scalar_t beta,
 
  374          BLRMatrixMPI<scalar_t>& y);
 
  376     template<
typename scalar_t> 
void 
  378          scalar_t alpha, 
const BLRMatrixMPI<scalar_t>& a,
 
  379          BLRMatrixMPI<scalar_t>& b);
 
  380     template<
typename scalar_t> 
void 
  381     gemm(
Trans ta, 
Trans tb, scalar_t alpha, 
const BLRMatrixMPI<scalar_t>& a,
 
  382          const BLRMatrixMPI<scalar_t>& b, scalar_t beta,
 
  383          BLRMatrixMPI<scalar_t>& c);
 
Contains the BLRMatrix class.
Contains the DistributedMatrix and DistributedMatrixWrapper classes, wrappers around ScaLAPACK/PBLAS ...
Distributed memory block low rank matrix.
Definition: BLRMatrixMPI.hpp:124
scalar_t operator()(std::size_t i, std::size_t j) const
std::size_t rank() const override
std::size_t rows() const override
Definition: BLRMatrixMPI.hpp:139
const scalar_t & global(std::size_t i, std::size_t j) const
std::size_t cols() const override
Definition: BLRMatrixMPI.hpp:140
std::size_t memory() const override
std::size_t nonzeros() const override
Class containing several options for the BLR code and data-structures.
Definition: BLROptions.hpp:82
Representation of a 2D processor grid, similar to a BLACS grid, but not requiring ScaLAPACK.
Definition: BLRMatrixMPI.hpp:56
ProcessorGrid2D(const MPIComm &comm)
ProcessorGrid2D(const MPIComm &comm, int P)
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Definition: DistributedMatrix.hpp:733
2D block cyclicly distributed matrix, as used by ScaLAPACK.
Definition: DistributedMatrix.hpp:84
Definition: BLRMatrixMPI.hpp:42
Definition: BLRMatrixMPI.hpp:43
Wrapper class around an MPI_Comm object.
Definition: MPIWrapper.hpp:194
bool is_root() const
Definition: MPIWrapper.hpp:293
int rank() const
Definition: MPIWrapper.hpp:271
Class to represent a structured matrix. This is the abstract base class for several types of structur...
Definition: StructuredMatrix.hpp:209
Definition: StrumpackOptions.hpp:43
UpLo
Definition: DenseMatrix.hpp:83
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
Side
Definition: DenseMatrix.hpp:74
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
Diag
Definition: DenseMatrix.hpp:92