32 #ifndef BLR_MATRIX_MPI_HPP
33 #define BLR_MATRIX_MPI_HPP
35 #include "dense/DistributedMatrix.hpp"
37 #include "BLRTile.hpp"
42 template<
typename scalar_t,
typename integer_t>
class ExtendAdd;
74 const MPIComm& Comm()
const {
return comm_; }
75 int nprows()
const {
return nprows_; }
76 int npcols()
const {
return npcols_; }
77 int prow()
const {
return prow_; }
78 int pcol()
const {
return pcol_; }
79 int rank()
const {
return Comm().
rank(); }
80 int npactives()
const {
return nprows()*npcols(); }
81 bool active()
const {
return active_; }
83 const MPIComm& row_comm()
const {
return rowcomm_; }
84 const MPIComm& col_comm()
const {
return colcomm_; }
86 bool is_local_row(
int i)
const {
return i % nprows_ == prow_; }
87 bool is_local_col(
int i)
const {
return i % npcols_ == pcol_; }
88 bool is_local(
int i,
int j)
const
89 {
return is_local_row(i) & is_local_col(j); }
91 int rg2p(
int i)
const {
return i % nprows(); }
92 int cg2p(
int j)
const {
return j % npcols(); }
93 int g2p(
int i,
int j)
const {
return rg2p(i) + cg2p(j) * nprows(); }
97 std::cout <<
"# ProcessorGrid2D: "
98 <<
"[" << nprows() <<
" x " << npcols() <<
"]"
103 bool active_ =
false;
104 int prow_ = -1, pcol_ = -1;
105 int nprows_ = 0, npcols_ = 0;
106 MPIComm comm_, rowcomm_, colcomm_;
130 using vec_t = std::vector<std::size_t>;
137 const vec_t& Rt,
const vec_t& Ct);
139 std::size_t
rows()
const override {
return rows_; }
140 std::size_t
cols()
const override {
return cols_; }
144 std::size_t
rank()
const override;
145 std::size_t total_memory()
const;
146 std::size_t total_nonzeros()
const;
147 std::size_t max_rank()
const;
149 const MPIComm& Comm()
const {
return grid_->Comm(); }
153 bool active()
const {
return grid_->active(); }
155 void fill(scalar_t v);
156 void fill_col(scalar_t v, std::size_t k, std::size_t CP);
158 std::vector<int>
factor(
const Opts_t& opts);
159 std::vector<int>
factor(
const adm_t& adm,
const Opts_t& opts);
160 std::vector<int> factor_col(
const adm_t& adm,
const Opts_t& opts,
161 const std::function<
void(
int,
bool, std::size_t)>& blockcol);
163 void laswp(
const std::vector<int>& piv,
bool fwd);
166 std::vector<int> partial_factor(BLRMPI_t& A11, BLRMPI_t& A12,
167 BLRMPI_t& A21, BLRMPI_t& A22,
168 const adm_t& adm,
const Opts_t& opts);
171 std::vector<int> partial_factor_col(BLRMPI_t& F11, BLRMPI_t& F12, BLRMPI_t& F21,
172 BLRMPI_t& F22,
const adm_t& adm,
const Opts_t& opts,
173 const std::function<
void(
int,
bool, std::size_t)>& blockcol);
175 void compress(
const Opts_t& opts);
178 BLRMPI_t from_ScaLAPACK(
const DistM_t& A,
const ProcessorGrid2D& g,
181 BLRMPI_t from_ScaLAPACK(
const DistM_t& A,
const ProcessorGrid2D& g,
182 const vec_t& Rt,
const vec_t& Ct);
183 DistM_t to_ScaLAPACK(
const BLACSGrid* g)
const;
184 void to_ScaLAPACK(DistM_t& A)
const;
186 void print(
const std::string& name);
188 std::size_t rowblocks()
const {
return brows_; }
189 std::size_t colblocks()
const {
return bcols_; }
190 std::size_t rowblockslocal()
const {
return lbrows_; }
191 std::size_t colblockslocal()
const {
return lbcols_; }
192 std::size_t tilerows(std::size_t i)
const {
return roff_[i+1] - roff_[i]; }
193 std::size_t tilecols(std::size_t j)
const {
return coff_[j+1] - coff_[j]; }
194 std::size_t tileroff(std::size_t i)
const { assert(i <= rowblocks());
return roff_[i]; }
195 std::size_t tilecoff(std::size_t j)
const { assert(j <= colblocks());
return coff_[j]; }
197 int rg2p(std::size_t i)
const;
198 int cg2p(std::size_t j)
const;
199 std::size_t rl2g(std::size_t i)
const;
200 std::size_t cl2g(std::size_t j)
const;
201 std::size_t rg2t(std::size_t i)
const;
202 std::size_t cg2t(std::size_t j)
const;
204 std::size_t lrows()
const {
return lrows_; }
205 std::size_t lcols()
const {
return lcols_; }
212 scalar_t&
operator()(std::size_t i, std::size_t j);
214 scalar_t get_element_and_decompress_HODBF(
int tr,
int tc,
int lr,
int lc);
215 void decompress_local_columns(
int c_min,
int c_max);
216 void remove_tiles_before_local_column(
int c_min,
int c_max);
222 const scalar_t&
global(std::size_t i, std::size_t j)
const;
223 scalar_t&
global(std::size_t i, std::size_t j);
226 std::size_t rows_ = 0, cols_ = 0, lrows_ = 0, lcols_ = 0;
227 std::size_t brows_ = 0, bcols_ = 0, lbrows_ = 0, lbcols_ = 0;
229 vec_t rl2t_, cl2t_, rl2l_, cl2l_, rl2g_, cl2g_;
230 std::vector<std::unique_ptr<BLRTile<scalar_t>>> blocks_;
233 std::size_t tilerg2l(std::size_t i)
const {
234 assert(
int(i % grid_->nprows()) == grid_->prow());
235 return i / grid_->nprows();
237 std::size_t tilecg2l(std::size_t j)
const {
238 assert(
int(j % grid_->npcols()) == grid_->pcol());
239 return j / grid_->npcols();
242 BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) {
243 return ltile(tilerg2l(i), tilecg2l(j));
245 const BLRTile<scalar_t>& tile(std::size_t i, std::size_t j)
const {
246 return ltile(tilerg2l(i), tilecg2l(j));
248 DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) {
249 return ltile_dense(tilerg2l(i), tilecg2l(j));
251 const DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j)
const {
252 return ltile_dense(tilerg2l(i), tilecg2l(j));
255 BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) {
256 assert(i < rowblockslocal() && j < colblockslocal());
257 return *blocks_[i+j*rowblockslocal()].get();
259 const BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j)
const {
260 assert(i < rowblockslocal() && j < colblockslocal());
261 return *blocks_[i+j*rowblockslocal()].get();
264 DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) {
265 assert(i < rowblockslocal() && j < colblockslocal());
266 assert(
dynamic_cast<DenseTile<scalar_t>*
>
267 (blocks_[i+j*rowblockslocal()].get()));
268 return *
static_cast<DenseTile<scalar_t>*
>
269 (blocks_[i+j*rowblockslocal()].get());
271 const DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j)
const {
272 assert(i < rowblockslocal() && j < colblockslocal());
273 assert(
dynamic_cast<const DenseTile<scalar_t>*
>
274 (blocks_[i+j*rowblockslocal()].get()));
275 return *
static_cast<const DenseTile<scalar_t>*
>
276 (blocks_[i+j*rowblockslocal()].get());
279 std::unique_ptr<BLRTile<scalar_t>>&
280 block(std::size_t i, std::size_t j) {
281 assert(i < rowblocks() && j < colblocks());
282 return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
284 const std::unique_ptr<BLRTile<scalar_t>>&
285 block(std::size_t i, std::size_t j)
const {
286 assert(i < rowblocks() && j < colblocks());
287 return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
290 std::unique_ptr<BLRTile<scalar_t>>&
291 lblock(std::size_t i, std::size_t j) {
292 assert(i < rowblockslocal() && j < colblockslocal());
293 return blocks_[i+j*rowblockslocal()];
295 const std::unique_ptr<BLRTile<scalar_t>>&
296 lblock(std::size_t i, std::size_t j)
const {
297 assert(i < rowblockslocal() && j < colblockslocal());
298 return blocks_[i+j*rowblockslocal()];
301 void compress_tile(std::size_t i, std::size_t j,
const Opts_t& opts);
304 bcast_dense_tile_along_col(std::size_t i, std::size_t j)
const;
306 bcast_dense_tile_along_row(std::size_t i, std::size_t j)
const;
308 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
309 bcast_row_of_tiles_along_cols(std::size_t i,
310 std::size_t j0, std::size_t j1)
const;
311 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
312 bcast_col_of_tiles_along_rows(std::size_t i0, std::size_t i1,
313 std::size_t j)
const;
315 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
316 gather_rows(std::size_t i0, std::size_t i1,
317 std::size_t j0, std::size_t j1)
const;
319 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
320 gather_cols(std::size_t i0, std::size_t i1,
321 std::size_t j0, std::size_t j1)
const;
323 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
324 gather_row(std::size_t i0, std::size_t k,
325 std::size_t j0, std::size_t j1)
const;
327 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
328 gather_col(std::size_t i0, std::size_t i1,
329 std::size_t j0, std::size_t k)
const;
331 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
332 gather_rows_A22(std::size_t i1, std::size_t j1)
const;
334 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
335 gather_cols_A22(std::size_t i1, std::size_t j1)
const;
337 template<
typename T>
friend void
340 template<
typename T>
friend void
341 gemv(
Trans ta, T alpha,
const BLRMatrixMPI<T>& a,
342 const BLRMatrixMPI<T>& x, T beta, BLRMatrixMPI<T>& y);
343 template<
typename T>
friend void
345 const BLRMatrixMPI<T>& a, BLRMatrixMPI<T>& b);
346 template<
typename T>
friend void
347 gemm(
Trans ta,
Trans tb, T alpha,
const BLRMatrixMPI<T>& a,
348 const BLRMatrixMPI<T>& b, T beta, BLRMatrixMPI<T>& c);
351 template<
typename T,
typename I>
friend class BLRExtendAdd;
354 template<
typename scalar_t>
void
355 LUAR(std::size_t kmax, std::size_t lk,
356 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
357 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
358 DenseMatrix<scalar_t>& tij,
const BLROptions<scalar_t>& opts,
361 template<
typename scalar_t>
void
362 LUAR_A22(std::size_t kmax, std::size_t lj, std::size_t lk,
363 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
364 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
365 DenseMatrix<scalar_t>& tij,
const BLROptions<scalar_t>& opts,
368 template<
typename scalar_t>
void
370 BLRMatrixMPI<scalar_t>& b);
371 template<
typename scalar_t>
void
372 gemv(
Trans ta, scalar_t alpha,
const BLRMatrixMPI<scalar_t>& a,
373 const BLRMatrixMPI<scalar_t>& x, scalar_t beta,
374 BLRMatrixMPI<scalar_t>& y);
376 template<
typename scalar_t>
void
378 scalar_t alpha,
const BLRMatrixMPI<scalar_t>& a,
379 BLRMatrixMPI<scalar_t>& b);
380 template<
typename scalar_t>
void
381 gemm(
Trans ta,
Trans tb, scalar_t alpha,
const BLRMatrixMPI<scalar_t>& a,
382 const BLRMatrixMPI<scalar_t>& b, scalar_t beta,
383 BLRMatrixMPI<scalar_t>& c);
Contains the BLRMatrix class.
Distributed memory block low rank matrix.
Definition: BLRMatrixMPI.hpp:124
scalar_t operator()(std::size_t i, std::size_t j) const
std::size_t rank() const override
std::size_t rows() const override
Definition: BLRMatrixMPI.hpp:139
const scalar_t & global(std::size_t i, std::size_t j) const
std::size_t cols() const override
Definition: BLRMatrixMPI.hpp:140
std::size_t memory() const override
std::size_t nonzeros() const override
Class containing several options for the BLR code and data-structures.
Definition: BLROptions.hpp:85
Representation of a 2D processor grid, similar to a BLACS grid, but not requiring ScaLAPACK.
Definition: BLRMatrixMPI.hpp:56
ProcessorGrid2D(const MPIComm &comm)
ProcessorGrid2D(const MPIComm &comm, int P)
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Definition: DistributedMatrix.hpp:289
Definition: DistributedMatrix.hpp:52
Definition: BLRMatrixMPI.hpp:42
Definition: BLRMatrixMPI.hpp:43
Wrapper class around an MPI_Comm object.
Definition: MPIWrapper.hpp:190
bool is_root() const
Definition: MPIWrapper.hpp:289
int rank() const
Definition: MPIWrapper.hpp:267
Class to represent a structured matrix. This is the abstract base class for several types of structur...
Definition: StructuredMatrix.hpp:209
Definition: StrumpackOptions.hpp:42
UpLo
Definition: DenseMatrix.hpp:83
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
Side
Definition: DenseMatrix.hpp:74
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
Diag
Definition: DenseMatrix.hpp:92