Loading...
Searching...
No Matches
BLRMatrixMPI.hpp
Go to the documentation of this file.
1/*
2 * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3 * Regents of the University of California, through Lawrence Berkeley
4 * National Laboratory (subject to receipt of any required approvals
5 * from the U.S. Dept. of Energy). All rights reserved.
6 *
7 * If you have questions about your rights to use or distribute this
8 * software, please contact Berkeley Lab's Technology Transfer
9 * Department at TTD@lbl.gov.
10 *
11 * NOTICE. This software is owned by the U.S. Department of Energy. As
12 * such, the U.S. Government has been granted for itself and others
13 * acting on its behalf a paid-up, nonexclusive, irrevocable,
14 * worldwide license in the Software to reproduce, prepare derivative
15 * works, and perform publicly and display publicly. Beginning five
16 * (5) years after the date permission to assert copyright is obtained
17 * from the U.S. Department of Energy, and subject to any subsequent
18 * five (5) year renewals, the U.S. Government is granted for itself
19 * and others acting on its behalf a paid-up, nonexclusive,
20 * irrevocable, worldwide license in the Software to reproduce,
21 * prepare derivative works, distribute copies to the public, perform
22 * publicly and display publicly, and to permit others to do so.
23 *
24 * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25 * (Lawrence Berkeley National Lab, Computational Research
26 * Division).
27 *
28 */
32#ifndef BLR_MATRIX_MPI_HPP
33#define BLR_MATRIX_MPI_HPP
34
36#include "BLRMatrix.hpp"
37#include "BLRTile.hpp"
38
39namespace strumpack {
40
41 // forward declaration
42 template<typename scalar_t,typename integer_t> class ExtendAdd;
43 template<typename scalar_t,typename integer_t> class FrontBLRMPI;
44
45 namespace BLR {
46
54 public:
69 ProcessorGrid2D(const MPIComm& comm, int P);
70
71 const MPIComm& Comm() const { return comm_; }
72 int nprows() const { return nprows_; }
73 int npcols() const { return npcols_; }
74 int prow() const { return prow_; }
75 int pcol() const { return pcol_; }
76 int rank() const { return Comm().rank(); }
77 int npactives() const { return nprows()*npcols(); }
78 bool active() const { return active_; }
79
80 const MPIComm& row_comm() const { return rowcomm_; }
81 const MPIComm& col_comm() const { return colcomm_; }
82
83 bool is_local_row(int i) const { return i % nprows_ == prow_; }
84 bool is_local_col(int i) const { return i % npcols_ == pcol_; }
85 bool is_local(int i, int j) const
86 { return is_local_row(i) && is_local_col(j); }
87
88 int rg2p(int i) const { return i % nprows(); }
89 int cg2p(int j) const { return j % npcols(); }
90 int g2p(int i, int j) const { return rg2p(i) + cg2p(j) * nprows(); }
91
92 void print() const {
93 if (comm_.is_root())
94 std::cout << "# ProcessorGrid2D: "
95 << "[" << nprows() << " x " << npcols() << "]"
96 << std::endl;
97 }
98
99 private:
100 bool active_ = false;
101 int prow_ = -1, pcol_ = -1;
102 int nprows_ = 0, npcols_ = 0;
103 MPIComm comm_, rowcomm_, colcomm_;
104 };
105
106
120 template<typename scalar_t> class BLRMatrixMPI
121 : public structured::StructuredMatrix<scalar_t> {
122 using real_t = typename RealType<scalar_t>::value_type;
128 using vec_t = std::vector<std::size_t>;
129 using adm_t = DenseMatrix<bool>;
131
132 public:
133 BLRMatrixMPI();
134 BLRMatrixMPI(const ProcessorGrid2D& grid,
135 const vec_t& Rt, const vec_t& Ct);
136
137 std::size_t rows() const override { return rows_; }
138 std::size_t cols() const override { return cols_; }
139
140 std::size_t memory() const override;
141 std::size_t nonzeros() const override;
142 std::size_t rank() const override;
143 std::size_t total_memory() const;
144 std::size_t total_nonzeros() const;
145 std::size_t max_rank() const;
146
147 real_t normF() const;
148
149 const MPIComm& Comm() const { return grid_->Comm(); }
150
151 const ProcessorGrid2D* grid() const { return grid_; }
152
153 bool active() const { return grid_->active(); }
154
155 void fill(scalar_t v);
156 void fill_col(scalar_t v, std::size_t k, std::size_t CP);
157
158 // TODO store piv in BLRMatrixMPI class
159 std::vector<int> factor(const Opts_t& opts);
160 std::vector<int> factor(const adm_t& adm, const Opts_t& opts);
161 std::vector<int> factor_col(const adm_t& adm, const Opts_t& opts,
162 const std::function
163 <void(int, bool, std::size_t)>& blockcol);
164
165 void laswp(const std::vector<int>& piv, bool fwd);
166
167#if defined(STRUMPACK_USE_GPU)
168 static std::vector<int>
169 partial_factor_gpu(BLRMPI_t& A11, BLRMPI_t& A12,
170 BLRMPI_t& A21, BLRMPI_t& A22,
171 const adm_t& adm, const Opts_t& opts);
172#endif
173
174 static std::vector<int>
175 partial_factor(BLRMPI_t& A11, BLRMPI_t& A12,
176 BLRMPI_t& A21, BLRMPI_t& A22,
177 const adm_t& adm, const Opts_t& opts);
178
179 static std::vector<int>
180 partial_factor_col(BLRMPI_t& F11, BLRMPI_t& F12, BLRMPI_t& F21,
181 BLRMPI_t& F22, const adm_t& adm, const Opts_t& opts,
182 const std::function<void(int, bool, std::size_t)>& blockcol);
183
184 void compress(const Opts_t& opts);
185
186 static
187 BLRMPI_t from_ScaLAPACK(const DistM_t& A, const ProcessorGrid2D& g,
188 const Opts_t& opts);
189 static
190 BLRMPI_t from_ScaLAPACK(const DistM_t& A, const ProcessorGrid2D& g,
191 const vec_t& Rt, const vec_t& Ct);
192 DistM_t to_ScaLAPACK(const BLACSGrid* g) const;
193 void to_ScaLAPACK(DistM_t& A) const;
194
195 void print(const std::string& name);
196
197 std::size_t rowblocks() const { return brows_; }
198 std::size_t colblocks() const { return bcols_; }
199 std::size_t rowblockslocal() const { return lbrows_; }
200 std::size_t colblockslocal() const { return lbcols_; }
201 std::size_t tilerows(std::size_t i) const { return roff_[i+1] - roff_[i]; }
202 std::size_t tilecols(std::size_t j) const { return coff_[j+1] - coff_[j]; }
203 std::size_t tileroff(std::size_t i) const { assert(i <= rowblocks()); return roff_[i]; }
204 std::size_t tilecoff(std::size_t j) const { assert(j <= colblocks()); return coff_[j]; }
205 std::size_t maxtilerows() const;
206 std::size_t maxtilecols() const;
207
208 int rg2p(std::size_t i) const;
209 int cg2p(std::size_t j) const;
210 std::size_t rl2g(std::size_t i) const;
211 std::size_t cl2g(std::size_t j) const;
212 std::size_t rg2t(std::size_t i) const;
213 std::size_t cg2t(std::size_t j) const;
214
215 std::size_t lrows() const { return lrows_; }
216 std::size_t lcols() const { return lcols_; }
217
222 scalar_t operator()(std::size_t i, std::size_t j) const;
223 scalar_t& operator()(std::size_t i, std::size_t j);
224
225 scalar_t get_element_and_decompress_HODBF(int tr, int tc, int lr, int lc);
226 void decompress_local_columns(int c_min, int c_max);
227 void remove_tiles_before_local_column(int c_min, int c_max);
233 const scalar_t& global(std::size_t i, std::size_t j) const;
234 scalar_t& global(std::size_t i, std::size_t j);
235
236 private:
237 std::size_t rows_ = 0, cols_ = 0, lrows_ = 0, lcols_ = 0;
238 std::size_t brows_ = 0, bcols_ = 0, lbrows_ = 0, lbcols_ = 0;
239 vec_t roff_, coff_;
240 vec_t rl2t_, cl2t_, rl2l_, cl2l_, rl2g_, cl2g_;
241 std::vector<std::unique_ptr<BLRTile<scalar_t>>> blocks_;
242 const ProcessorGrid2D* grid_ = nullptr;
243
244 std::size_t tilerg2l(std::size_t i) const {
245 assert(int(i % grid_->nprows()) == grid_->prow());
246 return i / grid_->nprows();
247 }
248 std::size_t tilecg2l(std::size_t j) const {
249 assert(int(j % grid_->npcols()) == grid_->pcol());
250 return j / grid_->npcols();
251 }
252
253 BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) {
254 return ltile(tilerg2l(i), tilecg2l(j));
255 }
256 const BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) const {
257 return ltile(tilerg2l(i), tilecg2l(j));
258 }
259 DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) {
260 return ltile_dense(tilerg2l(i), tilecg2l(j));
261 }
262 const DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) const {
263 return ltile_dense(tilerg2l(i), tilecg2l(j));
264 }
265
266 BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) {
267 assert(i < rowblockslocal() && j < colblockslocal());
268 return *blocks_[i+j*rowblockslocal()].get();
269 }
270 const BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) const {
271 assert(i < rowblockslocal() && j < colblockslocal());
272 return *blocks_[i+j*rowblockslocal()].get();
273 }
274
275 DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) {
276 assert(i < rowblockslocal() && j < colblockslocal());
277 assert(dynamic_cast<DenseTile<scalar_t>*>
278 (blocks_[i+j*rowblockslocal()].get()));
279 return *static_cast<DenseTile<scalar_t>*>
280 (blocks_[i+j*rowblockslocal()].get());
281 }
282 const DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) const {
283 assert(i < rowblockslocal() && j < colblockslocal());
284 assert(dynamic_cast<const DenseTile<scalar_t>*>
285 (blocks_[i+j*rowblockslocal()].get()));
286 return *static_cast<const DenseTile<scalar_t>*>
287 (blocks_[i+j*rowblockslocal()].get());
288 }
289
290 std::unique_ptr<BLRTile<scalar_t>>&
291 block(std::size_t i, std::size_t j) {
292 assert(i < rowblocks() && j < colblocks());
293 return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
294 }
295 const std::unique_ptr<BLRTile<scalar_t>>&
296 block(std::size_t i, std::size_t j) const {
297 assert(i < rowblocks() && j < colblocks());
298 return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
299 }
300
301 std::unique_ptr<BLRTile<scalar_t>>&
302 lblock(std::size_t i, std::size_t j) {
303 assert(i < rowblockslocal() && j < colblockslocal());
304 return blocks_[i+j*rowblockslocal()];
305 }
306 const std::unique_ptr<BLRTile<scalar_t>>&
307 lblock(std::size_t i, std::size_t j) const {
308 assert(i < rowblockslocal() && j < colblockslocal());
309 return blocks_[i+j*rowblockslocal()];
310 }
311
312 void compress_tile(std::size_t i, std::size_t j, const Opts_t& opts);
313
314 DenseTile<scalar_t>
315 bcast_dense_tile_along_col(std::size_t i, std::size_t j) const;
316 DenseTile<scalar_t>
317 bcast_dense_tile_along_row(std::size_t i, std::size_t j) const;
318
319 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
320 bcast_row_of_tiles_along_cols(std::size_t i,
321 std::size_t j0, std::size_t j1) const;
322 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
323 bcast_col_of_tiles_along_rows(std::size_t i0, std::size_t i1,
324 std::size_t j) const;
325
326#if defined(STRUMPACK_USE_GPU)
327 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
328 bcast_row_of_tiles_along_cols_gpu(std::size_t i,
329 std::size_t j0, std::size_t j1,
330 scalar_t* dptr, scalar_t* pinned,
331 bool gpu_aware) const;
332 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
333 bcast_col_of_tiles_along_rows_gpu(std::size_t i0, std::size_t i1,
334 std::size_t j,
335 scalar_t* dptr, scalar_t* pinned,
336 bool gpu_aware) const;
337#endif
338
339 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
340 gather_rows(std::size_t i0, std::size_t i1,
341 std::size_t j0, std::size_t j1) const;
342
343 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
344 gather_cols(std::size_t i0, std::size_t i1,
345 std::size_t j0, std::size_t j1) const;
346
347
348 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
349 send_row_of_tiles(std::size_t src_row, std::size_t dest_row,
350 std::size_t j0, std::size_t j1) const;
351 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
352 send_col_of_tiles(std::size_t src_col, std::size_t dest_col,
353 std::size_t i0, std::size_t i1) const;
354
355 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
356 gather_row(std::size_t i0, std::size_t k,
357 std::size_t j0, std::size_t j1) const;
358
359 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
360 gather_col(std::size_t i0, std::size_t i1,
361 std::size_t j0, std::size_t k) const;
362
363 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
364 gather_rows_A22(std::size_t i1, std::size_t j1) const;
365
366 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
367 gather_cols_A22(std::size_t i1, std::size_t j1) const;
368
369 template<typename T> friend void
370 trsv(UpLo ul, Trans ta, Diag d, const BLRMatrixMPI<T>& a,
371 BLRMatrixMPI<T>& b);
372 template<typename T> friend void
373 gemv(Trans ta, T alpha, const BLRMatrixMPI<T>& a,
374 const BLRMatrixMPI<T>& x, T beta, BLRMatrixMPI<T>& y);
375 template<typename T> friend void
376 trsm(Side s, UpLo ul, Trans ta, Diag d, T alpha,
377 const BLRMatrixMPI<T>& a, BLRMatrixMPI<T>& b);
378 template<typename T> friend void
379 gemm(Trans ta, Trans tb, T alpha, const BLRMatrixMPI<T>& a,
380 const BLRMatrixMPI<T>& b, T beta, BLRMatrixMPI<T>& c);
381
382#if defined(STRUMPACK_USE_GPU)
383 void move_to_gpu(gpu::Stream& s, scalar_t* dptr, scalar_t* pinned);
384 void move_to_cpu(gpu::Stream& s, scalar_t* pinned);
385 void move_row_to_cpu(int i, gpu::Stream& s, scalar_t* pinned);
386 void move_col_to_cpu(int j, gpu::Stream& s, scalar_t* pinned);
387#endif
388
389 // suppress warnings
390 using structured::StructuredMatrix<scalar_t>::factor;
391
392 template<typename T,typename I> friend class strumpack::ExtendAdd;
393 template<typename T,typename I> friend class BLRExtendAdd;
394 };
395
396 template<typename scalar_t> void
397 LUAR(std::size_t kmax, std::size_t lk,
398 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
399 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
400 DenseMatrix<scalar_t>& tij, const BLROptions<scalar_t>& opts,
401 std::size_t tmp);
402
403 template<typename scalar_t> void
404 LUAR_A22(std::size_t kmax, std::size_t lj, std::size_t lk,
405 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Ti,
406 std::vector<std::unique_ptr<BLRTile<scalar_t>>>& Tj,
407 DenseMatrix<scalar_t>& tij, const BLROptions<scalar_t>& opts,
408 std::size_t tmp);
409
410 template<typename scalar_t> void
411 trsv(UpLo ul, Trans ta, Diag d, const BLRMatrixMPI<scalar_t>& a,
412 BLRMatrixMPI<scalar_t>& b);
413 template<typename scalar_t> void
414 gemv(Trans ta, scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
415 const BLRMatrixMPI<scalar_t>& x, scalar_t beta,
416 BLRMatrixMPI<scalar_t>& y);
417
418 template<typename scalar_t> void
419 trsm(Side s, UpLo ul, Trans ta, Diag d,
420 scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
421 BLRMatrixMPI<scalar_t>& b);
422 template<typename scalar_t> void
423 gemm(Trans ta, Trans tb, scalar_t alpha, const BLRMatrixMPI<scalar_t>& a,
424 const BLRMatrixMPI<scalar_t>& b, scalar_t beta,
425 BLRMatrixMPI<scalar_t>& c);
426
427 } // end namespace BLR
428} // end namespace strumpack
429
430#endif // BLR_MATRIX_MPI_HPP
Contains the BLRMatrix class.
Contains the DistributedMatrix and DistributedMatrixWrapper classes, wrappers around ScaLAPACK/PBLAS ...
Distributed memory block low rank matrix.
Definition BLRMatrixMPI.hpp:121
scalar_t operator()(std::size_t i, std::size_t j) const
std::size_t rank() const override
std::size_t rows() const override
Definition BLRMatrixMPI.hpp:137
std::size_t cols() const override
Definition BLRMatrixMPI.hpp:138
std::size_t memory() const override
std::size_t nonzeros() const override
const scalar_t & global(std::size_t i, std::size_t j) const
Definition BLRMatrix.hpp:50
Representation of a 2D processor grid, similar to a BLACS grid, but not requiring ScaLAPACK.
Definition BLRMatrixMPI.hpp:53
ProcessorGrid2D(const MPIComm &comm)
ProcessorGrid2D(const MPIComm &comm, int P)
Definition BLRMatrixMPI.hpp:42
Definition BLRMatrixMPI.hpp:43
Wrapper class around an MPI_Comm object.
Definition MPIWrapper.hpp:173
bool is_root() const
Definition MPIWrapper.hpp:272
Class to represent a structured matrix. This is the abstract base class for several types of structur...
Definition StructuredMatrix.hpp:209
Definition StrumpackOptions.hpp:44
UpLo
Definition DenseMatrix.hpp:83
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
Side
Definition DenseMatrix.hpp:74
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
Diag
Definition DenseMatrix.hpp:93