BLRMatrixMPI.hpp
Go to the documentation of this file.
1/*
2 * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3 * Regents of the University of California, through Lawrence Berkeley
4 * National Laboratory (subject to receipt of any required approvals
5 * from the U.S. Dept. of Energy). All rights reserved.
6 *
7 * If you have questions about your rights to use or distribute this
8 * software, please contact Berkeley Lab's Technology Transfer
9 * Department at TTD@lbl.gov.
10 *
11 * NOTICE. This software is owned by the U.S. Department of Energy. As
12 * such, the U.S. Government has been granted for itself and others
13 * acting on its behalf a paid-up, nonexclusive, irrevocable,
14 * worldwide license in the Software to reproduce, prepare derivative
15 * works, and perform publicly and display publicly. Beginning five
16 * (5) years after the date permission to assert copyright is obtained
17 * from the U.S. Department of Energy, and subject to any subsequent
18 * five (5) year renewals, the U.S. Government is granted for itself
19 * and others acting on its behalf a paid-up, nonexclusive,
20 * irrevocable, worldwide license in the Software to reproduce,
21 * prepare derivative works, distribute copies to the public, perform
22 * publicly and display publicly, and to permit others to do so.
23 *
24 * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25 * (Lawrence Berkeley National Lab, Computational Research
26 * Division).
27 *
28 */
32#ifndef BLR_MATRIX_MPI_HPP
33#define BLR_MATRIX_MPI_HPP
34
36#include "BLRMatrix.hpp"
37#include "BLRTile.hpp"
38
39namespace strumpack {
40
41 // forward declaration
42 template<typename scalar_t,typename integer_t> class ExtendAdd;
43 template<typename scalar_t,typename integer_t> class FrontalMatrixBLRMPI;
44
45 namespace BLR {
46
47 // forward declaration
48 //template<typename scalar_t,typename integer_t> class BLRExtendAdd;
49
57 public:
72 ProcessorGrid2D(const MPIComm& comm, int P);
73
74 const MPIComm& Comm() const { return comm_; }
75 int nprows() const { return nprows_; }
76 int npcols() const { return npcols_; }
77 int prow() const { return prow_; }
78 int pcol() const { return pcol_; }
79 int rank() const { return Comm().rank(); }
80 int npactives() const { return nprows()*npcols(); }
81 bool active() const { return active_; }
82
83 const MPIComm& row_comm() const { return rowcomm_; }
84 const MPIComm& col_comm() const { return colcomm_; }
85
86 bool is_local_row(int i) const { return i % nprows_ == prow_; }
87 bool is_local_col(int i) const { return i % npcols_ == pcol_; }
88 bool is_local(int i, int j) const
89 { return is_local_row(i) && is_local_col(j); }
90
91 int rg2p(int i) const { return i % nprows(); }
92 int cg2p(int j) const { return j % npcols(); }
93 int g2p(int i, int j) const { return rg2p(i) + cg2p(j) * nprows(); }
94
95 void print() const {
96 if (comm_.is_root())
97 std::cout << "# ProcessorGrid2D: "
98 << "[" << nprows() << " x " << npcols() << "]"
99 << std::endl;
100 }
101
102 private:
103 bool active_ = false;
104 int prow_ = -1, pcol_ = -1;
105 int nprows_ = 0, npcols_ = 0;
106 MPIComm comm_, rowcomm_, colcomm_;
107 };
108
109
123 template<typename scalar_t> class BLRMatrixMPI
124 : public structured::StructuredMatrix<scalar_t> {
130 using vec_t = std::vector<std::size_t>;
131 using adm_t = DenseMatrix<bool>;
133
134 public:
135 BLRMatrixMPI();
136 BLRMatrixMPI(const ProcessorGrid2D& grid,
137 const vec_t& Rt, const vec_t& Ct);
138
139 std::size_t rows() const override { return rows_; }
140 std::size_t cols() const override { return cols_; }
141
142 std::size_t memory() const override;
143 std::size_t nonzeros() const override;
144 std::size_t rank() const override;
145 std::size_t total_memory() const;
146 std::size_t total_nonzeros() const;
147 std::size_t max_rank() const;
148
149 const MPIComm& Comm() const { return grid_->Comm(); }
150
151 const ProcessorGrid2D* grid() const { return grid_; }
152
153 bool active() const { return grid_->active(); }
154
155 void fill(scalar_t v);
156 void fill_col(scalar_t v, std::size_t k, std::size_t CP);
157
158 std::vector<int> factor(const Opts_t& opts);
159 std::vector<int> factor(const adm_t& adm, const Opts_t& opts);
160 std::vector<int> factor_col(const adm_t& adm, const Opts_t& opts,
161 const std::function<void(int, bool, std::size_t)>& blockcol);
162
163 void laswp(const std::vector<int>& piv, bool fwd);
164
165 static
166 std::vector<int> partial_factor(BLRMPI_t& A11, BLRMPI_t& A12,
167 BLRMPI_t& A21, BLRMPI_t& A22,
168 const adm_t& adm, const Opts_t& opts);
169
170 static
171 std::vector<int> partial_factor_col(BLRMPI_t& F11, BLRMPI_t& F12, BLRMPI_t& F21,
172 BLRMPI_t& F22, const adm_t& adm, const Opts_t& opts,
173 const std::function<void(int, bool, std::size_t)>& blockcol);
174
175 void compress(const Opts_t& opts);
176
177 static
178 BLRMPI_t from_ScaLAPACK(const DistM_t& A, const ProcessorGrid2D& g,
179 const Opts_t& opts);
180 static
181 BLRMPI_t from_ScaLAPACK(const DistM_t& A, const ProcessorGrid2D& g,
182 const vec_t& Rt, const vec_t& Ct);
183 DistM_t to_ScaLAPACK(const BLACSGrid* g) const;
184 void to_ScaLAPACK(DistM_t& A) const;
185
186 void print(const std::string& name);
187
188 std::size_t rowblocks() const { return brows_; }
189 std::size_t colblocks() const { return bcols_; }
190 std::size_t rowblockslocal() const { return lbrows_; }
191 std::size_t colblockslocal() const { return lbcols_; }
192 std::size_t tilerows(std::size_t i) const { return roff_[i+1] - roff_[i]; }
193 std::size_t tilecols(std::size_t j) const { return coff_[j+1] - coff_[j]; }
194 std::size_t tileroff(std::size_t i) const { assert(i <= rowblocks()); return roff_[i]; }
195 std::size_t tilecoff(std::size_t j) const { assert(j <= colblocks()); return coff_[j]; }
196
197 int rg2p(std::size_t i) const;
198 int cg2p(std::size_t j) const;
199 std::size_t rl2g(std::size_t i) const;
200 std::size_t cl2g(std::size_t j) const;
201 std::size_t rg2t(std::size_t i) const;
202 std::size_t cg2t(std::size_t j) const;
203
204 std::size_t lrows() const { return lrows_; }
205 std::size_t lcols() const { return lcols_; }
206
211 scalar_t operator()(std::size_t i, std::size_t j) const;
212 scalar_t& operator()(std::size_t i, std::size_t j);
213
214 scalar_t get_element_and_decompress_HODBF(int tr, int tc, int lr, int lc);
215 void decompress_local_columns(int c_min, int c_max);
216 void remove_tiles_before_local_column(int c_min, int c_max);
222 const scalar_t& global(std::size_t i, std::size_t j) const;
223 scalar_t& global(std::size_t i, std::size_t j);
224
225 private:
226 std::size_t rows_ = 0, cols_ = 0, lrows_ = 0, lcols_ = 0;
227 std::size_t brows_ = 0, bcols_ = 0, lbrows_ = 0, lbcols_ = 0;
228 vec_t roff_, coff_;
229 vec_t rl2t_, cl2t_, rl2l_, cl2l_, rl2g_, cl2g_;
230 std::vector<std::unique_ptr<BLRTile<scalar_t>>> blocks_;
231 const ProcessorGrid2D* grid_ = nullptr;
232
233 std::size_t tilerg2l(std::size_t i) const {
234 assert(int(i % grid_->nprows()) == grid_->prow());
235 return i / grid_->nprows();
236 }
237 std::size_t tilecg2l(std::size_t j) const {
238 assert(int(j % grid_->npcols()) == grid_->pcol());
239 return j / grid_->npcols();
240 }
241
242 BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) {
243 return ltile(tilerg2l(i), tilecg2l(j));
244 }
245 const BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) const {
246 return ltile(tilerg2l(i), tilecg2l(j));
247 }
248 DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) {
249 return ltile_dense(tilerg2l(i), tilecg2l(j));
250 }
251 const DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) const {
252 return ltile_dense(tilerg2l(i), tilecg2l(j));
253 }
254
255 BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) {
256 assert(i < rowblockslocal() && j < colblockslocal());
257 return *blocks_[i+j*rowblockslocal()].get();
258 }
259 const BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) const {
260 assert(i < rowblockslocal() && j < colblockslocal());
261 return *blocks_[i+j*rowblockslocal()].get();
262 }
263
264 DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) {
265 assert(i < rowblockslocal() && j < colblockslocal());
266 assert(dynamic_cast<DenseTile<scalar_t>*>
267 (blocks_[i+j*rowblockslocal()].get()));
268 return *static_cast<DenseTile<scalar_t>*>
269 (blocks_[i+j*rowblockslocal()].get());
270 }
271 const DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) const {
272 assert(i < rowblockslocal() && j < colblockslocal());
273 assert(dynamic_cast<const DenseTile<scalar_t>*>
274 (blocks_[i+j*rowblockslocal()].get()));
275 return *static_cast<const DenseTile<scalar_t>*>
276 (blocks_[i+j*rowblockslocal()].get());
277 }
278
279 std::unique_ptr<BLRTile<scalar_t>>&
280 block(std::size_t i, std::size_t j) {
281 assert(i < rowblocks() && j < colblocks());
282 return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
283 }
284 const std::unique_ptr<BLRTile<scalar_t>>&
285 block(std::size_t i, std::size_t j) const {
286 assert(i < rowblocks() && j < colblocks());
287 return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
288 }
289
290 std::unique_ptr<BLRTile<scalar_t>>&
291 lblock(std::size_t i, std::size_t j) {
292 assert(i < rowblockslocal() && j < colblockslocal());
293 return blocks_[i+j*rowblockslocal()];
294 }
295 const std::unique_ptr<BLRTile<scalar_t>>&
296 lblock(std::size_t i, std::size_t j) const {
297 assert(i < rowblockslocal() && j < colblockslocal());
298 return blocks_[i+j*rowblockslocal()];
299 }
300
301 void compress_tile(std::size_t i, std::size_t j, const Opts_t& opts);
302
303 DenseTile<scalar_t>
304 bcast_dense_tile_along_col(std::size_t i, std::size_t j) const;
305 DenseTile<scalar_t>
306 bcast_dense_tile_along_row(std::size_t i, std::size_t j) const;
307
308 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
309 bcast_row_of_tiles_along_cols(std::size_t i,
310 std::size_t j0, std::size_t j1) const;
311 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
312 bcast_col_of_tiles_along_rows(std::size_t i0, std::size_t i1,
313 std::size_t j) const;
314
315 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
316 gather_rows(std::size_t i0, std::size_t i1,
317 std::size_t j0, std::size_t j1) const;
318
319 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
320 gather_cols(std::size_t i0, std::size_t i1,
321 std::size_t j0, std::size_t j1) const;
322
323 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
324 gather_row(std::size_t i0, std::size_t k,
325 std::size_t j0, std::size_t j1) const;
326
327 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
328 gather_col(std::size_t i0, std::size_t i1,
329 std::size_t j0, std::size_t k) const;
330
331 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
332 gather_rows_A22(std::size_t i1, std::size_t j1) const;
333
334 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
335 gather_cols_A22(std::size_t i1, std::size_t j1) const;
336
337 template<typename T> friend void
338 trsv(UpLo ul, Trans ta, Diag d, const BLRMatrixMPI<T>& a,
339 BLRMatrixMPI<T>& b);
340 template<typename T> friend void
341 gemv(Trans ta, T alpha, const BLRMatrixMPI<T>& a,
342 const BLRMatrixMPI<T>& x, T beta, BLRMatrixMPI<T>& y);
343 template<typename T> friend void
344 trsm(Side s, UpLo ul, Trans ta, Diag d, T alpha,
345 const BLRMatrixMPI<T>& a, BLRMatrixMPI<T>& b);
346 template<typename T> friend void
347 gemm(Trans ta, Trans tb, T alpha, const BLRMatrixMPI<T>& a,
348 const BLRMatrixMPI<T>& b, T beta, BLRMatrixMPI<T>& c);
349
350 template<typename T,typename I> friend class strumpack::ExtendAdd;
351 template<typename T,typename I> friend class BLRExtendAdd;
352 };
353
354 template<typename scalar_t> void
355 LUAR(std::size_t kmax, std::size_t lk,
356 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
357 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
358 DenseMatrix<scalar_t>& tij, const BLROptions<scalar_t>& opts,
359 std::size_t tmp);
360
361 template<typename scalar_t> void
362 LUAR_A22(std::size_t kmax, std::size_t lj, std::size_t lk,
363 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
364 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
365 DenseMatrix<scalar_t>& tij, const BLROptions<scalar_t>& opts,
366 std::size_t tmp);
367
368 template<typename scalar_t> void
369 trsv(UpLo ul, Trans ta, Diag d, const BLRMatrixMPI<scalar_t>& a,
370 BLRMatrixMPI<scalar_t>& b);
371 template<typename scalar_t> void
372 gemv(Trans ta, scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
373 const BLRMatrixMPI<scalar_t>& x, scalar_t beta,
374 BLRMatrixMPI<scalar_t>& y);
375
376 template<typename scalar_t> void
377 trsm(Side s, UpLo ul, Trans ta, Diag d,
378 scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
379 BLRMatrixMPI<scalar_t>& b);
380 template<typename scalar_t> void
381 gemm(Trans ta, Trans tb, scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
382 const BLRMatrixMPI<scalar_t>& b, scalar_t beta,
383 BLRMatrixMPI<scalar_t>& c);
384
385 } // end namespace BLR
386} // end namespace strumpack
387
388#endif // BLR_MATRIX_MPI_HPP
Contains the BLRMatrix class.
Contains the DistributedMatrix and DistributedMatrixWrapper classes, wrappers around ScaLAPACK/PBLAS ...
Distributed memory block low rank matrix.
Definition: BLRMatrixMPI.hpp:124
scalar_t operator()(std::size_t i, std::size_t j) const
std::size_t rank() const override
std::size_t rows() const override
Definition: BLRMatrixMPI.hpp:139
std::size_t cols() const override
Definition: BLRMatrixMPI.hpp:140
std::size_t memory() const override
std::size_t nonzeros() const override
const scalar_t & global(std::size_t i, std::size_t j) const
Class containing several options for the BLR code and data-structures.
Definition: BLROptions.hpp:82
Representation of a 2D processor grid, similar to a BLACS grid, but not requiring ScaLAPACK.
Definition: BLRMatrixMPI.hpp:56
ProcessorGrid2D(const MPIComm &comm)
ProcessorGrid2D(const MPIComm &comm, int P)
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Definition: DistributedMatrix.hpp:733
2D block cyclicly distributed matrix, as used by ScaLAPACK.
Definition: DistributedMatrix.hpp:84
Definition: BLRMatrixMPI.hpp:42
Definition: BLRMatrixMPI.hpp:43
Wrapper class around an MPI_Comm object.
Definition: MPIWrapper.hpp:194
bool is_root() const
Definition: MPIWrapper.hpp:293
int rank() const
Definition: MPIWrapper.hpp:271
Class to represent a structured matrix. This is the abstract base class for several types of structur...
Definition: StructuredMatrix.hpp:209
Definition: StrumpackOptions.hpp:43
UpLo
Definition: DenseMatrix.hpp:83
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
Side
Definition: DenseMatrix.hpp:74
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
Diag
Definition: DenseMatrix.hpp:92