BLRMatrixMPI.hpp
Go to the documentation of this file.
1 /*
2  * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3  * Regents of the University of California, through Lawrence Berkeley
4  * National Laboratory (subject to receipt of any required approvals
5  * from the U.S. Dept. of Energy). All rights reserved.
6  *
7  * If you have questions about your rights to use or distribute this
8  * software, please contact Berkeley Lab's Technology Transfer
9  * Department at TTD@lbl.gov.
10  *
11  * NOTICE. This software is owned by the U.S. Department of Energy. As
12  * such, the U.S. Government has been granted for itself and others
13  * acting on its behalf a paid-up, nonexclusive, irrevocable,
14  * worldwide license in the Software to reproduce, prepare derivative
15  * works, and perform publicly and display publicly. Beginning five
16  * (5) years after the date permission to assert copyright is obtained
17  * from the U.S. Department of Energy, and subject to any subsequent
18  * five (5) year renewals, the U.S. Government is granted for itself
19  * and others acting on its behalf a paid-up, nonexclusive,
20  * irrevocable, worldwide license in the Software to reproduce,
21  * prepare derivative works, distribute copies to the public, perform
22  * publicly and display publicly, and to permit others to do so.
23  *
24  * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25  * (Lawrence Berkeley National Lab, Computational Research
26  * Division).
27  *
28  */
32 #ifndef BLR_MATRIX_MPI_HPP
33 #define BLR_MATRIX_MPI_HPP
34 
35 #include "dense/DistributedMatrix.hpp"
36 #include "BLRMatrix.hpp"
37 #include "BLRTile.hpp"
38 
39 namespace strumpack {
40 
41  // forward declaration
42  template<typename scalar_t,typename integer_t> class ExtendAdd;
43  template<typename scalar_t,typename integer_t> class FrontalMatrixBLRMPI;
44 
45  namespace BLR {
46 
47  // forward declaration
48  //template<typename scalar_t,typename integer_t> class BLRExtendAdd;
49 
57  public:
64  ProcessorGrid2D(const MPIComm& comm);
72  ProcessorGrid2D(const MPIComm& comm, int P);
73 
74  const MPIComm& Comm() const { return comm_; }
75  int nprows() const { return nprows_; }
76  int npcols() const { return npcols_; }
77  int prow() const { return prow_; }
78  int pcol() const { return pcol_; }
79  int rank() const { return Comm().rank(); }
80  int npactives() const { return nprows()*npcols(); }
81  bool active() const { return active_; }
82 
83  const MPIComm& row_comm() const { return rowcomm_; }
84  const MPIComm& col_comm() const { return colcomm_; }
85 
86  bool is_local_row(int i) const { return i % nprows_ == prow_; }
87  bool is_local_col(int i) const { return i % npcols_ == pcol_; }
88  bool is_local(int i, int j) const
89  { return is_local_row(i) & is_local_col(j); }
90 
91  int rg2p(int i) const { return i % nprows(); }
92  int cg2p(int j) const { return j % npcols(); }
93  int g2p(int i, int j) const { return rg2p(i) + cg2p(j) * nprows(); }
94 
95  void print() const {
96  if (comm_.is_root())
97  std::cout << "# ProcessorGrid2D: "
98  << "[" << nprows() << " x " << npcols() << "]"
99  << std::endl;
100  }
101 
102  private:
103  bool active_ = false;
104  int prow_ = -1, pcol_ = -1;
105  int nprows_ = 0, npcols_ = 0;
106  MPIComm comm_, rowcomm_, colcomm_;
107  };
108 
109 
123  template<typename scalar_t> class BLRMatrixMPI
124  : public structured::StructuredMatrix<scalar_t> {
130  using vec_t = std::vector<std::size_t>;
131  using adm_t = DenseMatrix<bool>;
133 
134  public:
135  BLRMatrixMPI();
136  BLRMatrixMPI(const ProcessorGrid2D& grid,
137  const vec_t& Rt, const vec_t& Ct);
138 
139  std::size_t rows() const override { return rows_; }
140  std::size_t cols() const override { return cols_; }
141 
142  std::size_t memory() const override;
143  std::size_t nonzeros() const override;
144  std::size_t rank() const override;
145  std::size_t total_memory() const;
146  std::size_t total_nonzeros() const;
147  std::size_t max_rank() const;
148 
149  const MPIComm& Comm() const { return grid_->Comm(); }
150 
151  const ProcessorGrid2D* grid() const { return grid_; }
152 
153  bool active() const { return grid_->active(); }
154 
155  void fill(scalar_t v);
156  void fill_col(scalar_t v, std::size_t k, std::size_t CP);
157 
158  std::vector<int> factor(const Opts_t& opts);
159  std::vector<int> factor(const adm_t& adm, const Opts_t& opts);
160  std::vector<int> factor_col(const adm_t& adm, const Opts_t& opts,
161  const std::function<void(int, bool, std::size_t)>& blockcol);
162 
163  void laswp(const std::vector<int>& piv, bool fwd);
164 
165  static
166  std::vector<int> partial_factor(BLRMPI_t& A11, BLRMPI_t& A12,
167  BLRMPI_t& A21, BLRMPI_t& A22,
168  const adm_t& adm, const Opts_t& opts);
169 
170  static
171  std::vector<int> partial_factor_col(BLRMPI_t& F11, BLRMPI_t& F12, BLRMPI_t& F21,
172  BLRMPI_t& F22, const adm_t& adm, const Opts_t& opts,
173  const std::function<void(int, bool, std::size_t)>& blockcol);
174 
175  void compress(const Opts_t& opts);
176 
177  static
178  BLRMPI_t from_ScaLAPACK(const DistM_t& A, const ProcessorGrid2D& g,
179  const Opts_t& opts);
180  static
181  BLRMPI_t from_ScaLAPACK(const DistM_t& A, const ProcessorGrid2D& g,
182  const vec_t& Rt, const vec_t& Ct);
183  DistM_t to_ScaLAPACK(const BLACSGrid* g) const;
184  void to_ScaLAPACK(DistM_t& A) const;
185 
186  void print(const std::string& name);
187 
188  std::size_t rowblocks() const { return brows_; }
189  std::size_t colblocks() const { return bcols_; }
190  std::size_t rowblockslocal() const { return lbrows_; }
191  std::size_t colblockslocal() const { return lbcols_; }
192  std::size_t tilerows(std::size_t i) const { return roff_[i+1] - roff_[i]; }
193  std::size_t tilecols(std::size_t j) const { return coff_[j+1] - coff_[j]; }
194  std::size_t tileroff(std::size_t i) const { assert(i <= rowblocks()); return roff_[i]; }
195  std::size_t tilecoff(std::size_t j) const { assert(j <= colblocks()); return coff_[j]; }
196 
197  int rg2p(std::size_t i) const;
198  int cg2p(std::size_t j) const;
199  std::size_t rl2g(std::size_t i) const;
200  std::size_t cl2g(std::size_t j) const;
201  std::size_t rg2t(std::size_t i) const;
202  std::size_t cg2t(std::size_t j) const;
203 
204  std::size_t lrows() const { return lrows_; }
205  std::size_t lcols() const { return lcols_; }
206 
211  scalar_t operator()(std::size_t i, std::size_t j) const;
212  scalar_t& operator()(std::size_t i, std::size_t j);
213 
214  scalar_t get_element_and_decompress_HODBF(int tr, int tc, int lr, int lc);
215  void decompress_local_columns(int c_min, int c_max);
216  void remove_tiles_before_local_column(int c_min, int c_max);
222  const scalar_t& global(std::size_t i, std::size_t j) const;
223  scalar_t& global(std::size_t i, std::size_t j);
224 
225  private:
226  std::size_t rows_ = 0, cols_ = 0, lrows_ = 0, lcols_ = 0;
227  std::size_t brows_ = 0, bcols_ = 0, lbrows_ = 0, lbcols_ = 0;
228  vec_t roff_, coff_;
229  vec_t rl2t_, cl2t_, rl2l_, cl2l_, rl2g_, cl2g_;
230  std::vector<std::unique_ptr<BLRTile<scalar_t>>> blocks_;
231  const ProcessorGrid2D* grid_ = nullptr;
232 
233  std::size_t tilerg2l(std::size_t i) const {
234  assert(int(i % grid_->nprows()) == grid_->prow());
235  return i / grid_->nprows();
236  }
237  std::size_t tilecg2l(std::size_t j) const {
238  assert(int(j % grid_->npcols()) == grid_->pcol());
239  return j / grid_->npcols();
240  }
241 
242  BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) {
243  return ltile(tilerg2l(i), tilecg2l(j));
244  }
245  const BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) const {
246  return ltile(tilerg2l(i), tilecg2l(j));
247  }
248  DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) {
249  return ltile_dense(tilerg2l(i), tilecg2l(j));
250  }
251  const DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) const {
252  return ltile_dense(tilerg2l(i), tilecg2l(j));
253  }
254 
255  BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) {
256  assert(i < rowblockslocal() && j < colblockslocal());
257  return *blocks_[i+j*rowblockslocal()].get();
258  }
259  const BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) const {
260  assert(i < rowblockslocal() && j < colblockslocal());
261  return *blocks_[i+j*rowblockslocal()].get();
262  }
263 
264  DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) {
265  assert(i < rowblockslocal() && j < colblockslocal());
266  assert(dynamic_cast<DenseTile<scalar_t>*>
267  (blocks_[i+j*rowblockslocal()].get()));
268  return *static_cast<DenseTile<scalar_t>*>
269  (blocks_[i+j*rowblockslocal()].get());
270  }
271  const DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) const {
272  assert(i < rowblockslocal() && j < colblockslocal());
273  assert(dynamic_cast<const DenseTile<scalar_t>*>
274  (blocks_[i+j*rowblockslocal()].get()));
275  return *static_cast<const DenseTile<scalar_t>*>
276  (blocks_[i+j*rowblockslocal()].get());
277  }
278 
279  std::unique_ptr<BLRTile<scalar_t>>&
280  block(std::size_t i, std::size_t j) {
281  assert(i < rowblocks() && j < colblocks());
282  return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
283  }
284  const std::unique_ptr<BLRTile<scalar_t>>&
285  block(std::size_t i, std::size_t j) const {
286  assert(i < rowblocks() && j < colblocks());
287  return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
288  }
289 
290  std::unique_ptr<BLRTile<scalar_t>>&
291  lblock(std::size_t i, std::size_t j) {
292  assert(i < rowblockslocal() && j < colblockslocal());
293  return blocks_[i+j*rowblockslocal()];
294  }
295  const std::unique_ptr<BLRTile<scalar_t>>&
296  lblock(std::size_t i, std::size_t j) const {
297  assert(i < rowblockslocal() && j < colblockslocal());
298  return blocks_[i+j*rowblockslocal()];
299  }
300 
301  void compress_tile(std::size_t i, std::size_t j, const Opts_t& opts);
302 
303  DenseTile<scalar_t>
304  bcast_dense_tile_along_col(std::size_t i, std::size_t j) const;
305  DenseTile<scalar_t>
306  bcast_dense_tile_along_row(std::size_t i, std::size_t j) const;
307 
308  std::vector<std::unique_ptr<BLRTile<scalar_t>>>
309  bcast_row_of_tiles_along_cols(std::size_t i,
310  std::size_t j0, std::size_t j1) const;
311  std::vector<std::unique_ptr<BLRTile<scalar_t>>>
312  bcast_col_of_tiles_along_rows(std::size_t i0, std::size_t i1,
313  std::size_t j) const;
314 
315  std::vector<std::unique_ptr<BLRTile<scalar_t>>>
316  gather_rows(std::size_t i0, std::size_t i1,
317  std::size_t j0, std::size_t j1) const;
318 
319  std::vector<std::unique_ptr<BLRTile<scalar_t>>>
320  gather_cols(std::size_t i0, std::size_t i1,
321  std::size_t j0, std::size_t j1) const;
322 
323  std::vector<std::unique_ptr<BLRTile<scalar_t>>>
324  gather_row(std::size_t i0, std::size_t k,
325  std::size_t j0, std::size_t j1) const;
326 
327  std::vector<std::unique_ptr<BLRTile<scalar_t>>>
328  gather_col(std::size_t i0, std::size_t i1,
329  std::size_t j0, std::size_t k) const;
330 
331  std::vector<std::unique_ptr<BLRTile<scalar_t>>>
332  gather_rows_A22(std::size_t i1, std::size_t j1) const;
333 
334  std::vector<std::unique_ptr<BLRTile<scalar_t>>>
335  gather_cols_A22(std::size_t i1, std::size_t j1) const;
336 
337  template<typename T> friend void
338  trsv(UpLo ul, Trans ta, Diag d, const BLRMatrixMPI<T>& a,
339  BLRMatrixMPI<T>& b);
340  template<typename T> friend void
341  gemv(Trans ta, T alpha, const BLRMatrixMPI<T>& a,
342  const BLRMatrixMPI<T>& x, T beta, BLRMatrixMPI<T>& y);
343  template<typename T> friend void
344  trsm(Side s, UpLo ul, Trans ta, Diag d, T alpha,
345  const BLRMatrixMPI<T>& a, BLRMatrixMPI<T>& b);
346  template<typename T> friend void
347  gemm(Trans ta, Trans tb, T alpha, const BLRMatrixMPI<T>& a,
348  const BLRMatrixMPI<T>& b, T beta, BLRMatrixMPI<T>& c);
349 
350  template<typename T,typename I> friend class strumpack::ExtendAdd;
351  template<typename T,typename I> friend class BLRExtendAdd;
352  };
353 
354  template<typename scalar_t> void
355  LUAR(std::size_t kmax, std::size_t lk,
356  std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
357  std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
358  DenseMatrix<scalar_t>& tij, const BLROptions<scalar_t>& opts,
359  std::size_t tmp);
360 
361  template<typename scalar_t> void
362  LUAR_A22(std::size_t kmax, std::size_t lj, std::size_t lk,
363  std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
364  std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
365  DenseMatrix<scalar_t>& tij, const BLROptions<scalar_t>& opts,
366  std::size_t tmp);
367 
368  template<typename scalar_t> void
369  trsv(UpLo ul, Trans ta, Diag d, const BLRMatrixMPI<scalar_t>& a,
370  BLRMatrixMPI<scalar_t>& b);
371  template<typename scalar_t> void
372  gemv(Trans ta, scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
373  const BLRMatrixMPI<scalar_t>& x, scalar_t beta,
374  BLRMatrixMPI<scalar_t>& y);
375 
376  template<typename scalar_t> void
377  trsm(Side s, UpLo ul, Trans ta, Diag d,
378  scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
379  BLRMatrixMPI<scalar_t>& b);
380  template<typename scalar_t> void
381  gemm(Trans ta, Trans tb, scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
382  const BLRMatrixMPI<scalar_t>& b, scalar_t beta,
383  BLRMatrixMPI<scalar_t>& c);
384 
385  } // end namespace BLR
386 } // end namespace strumpack
387 
388 #endif // BLR_MATRIX_MPI_HPP
Contains the BLRMatrix class.
Distributed memory block low rank matrix.
Definition: BLRMatrixMPI.hpp:124
scalar_t operator()(std::size_t i, std::size_t j) const
std::size_t rank() const override
std::size_t rows() const override
Definition: BLRMatrixMPI.hpp:139
const scalar_t & global(std::size_t i, std::size_t j) const
std::size_t cols() const override
Definition: BLRMatrixMPI.hpp:140
std::size_t memory() const override
std::size_t nonzeros() const override
Class containing several options for the BLR code and data-structures.
Definition: BLROptions.hpp:85
Representation of a 2D processor grid, similar to a BLACS grid, but not requiring ScaLAPACK.
Definition: BLRMatrixMPI.hpp:56
ProcessorGrid2D(const MPIComm &comm)
ProcessorGrid2D(const MPIComm &comm, int P)
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Definition: DistributedMatrix.hpp:289
Definition: DistributedMatrix.hpp:52
Definition: BLRMatrixMPI.hpp:42
Definition: BLRMatrixMPI.hpp:43
Wrapper class around an MPI_Comm object.
Definition: MPIWrapper.hpp:190
bool is_root() const
Definition: MPIWrapper.hpp:289
int rank() const
Definition: MPIWrapper.hpp:267
Class to represent a structured matrix. This is the abstract base class for several types of structur...
Definition: StructuredMatrix.hpp:209
Definition: StrumpackOptions.hpp:42
UpLo
Definition: DenseMatrix.hpp:83
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
Side
Definition: DenseMatrix.hpp:74
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
Diag
Definition: DenseMatrix.hpp:92