HSSMatrixBase.hpp
Go to the documentation of this file.
1 /*
2  * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3  * Regents of the University of California, through Lawrence Berkeley
4  * National Laboratory (subject to receipt of any required approvals
5  * from the U.S. Dept. of Energy). All rights reserved.
6  *
7  * If you have questions about your rights to use or distribute this
8  * software, please contact Berkeley Lab's Technology Transfer
9  * Department at TTD@lbl.gov.
10  *
11  * NOTICE. This software is owned by the U.S. Department of Energy. As
12  * such, the U.S. Government has been granted for itself and others
13  * acting on its behalf a paid-up, nonexclusive, irrevocable,
14  * worldwide license in the Software to reproduce, prepare derivative
15  * works, and perform publicly and display publicly. Beginning five
16  * (5) years after the date permission to assert copyright is obtained
17  * from the U.S. Department of Energy, and subject to any subsequent
18  * five (5) year renewals, the U.S. Government is granted for itself
19  * and others acting on its behalf a paid-up, nonexclusive,
20  * irrevocable, worldwide license in the Software to reproduce,
21  * prepare derivative works, distribute copies to the public, perform
22  * publicly and display publicly, and to permit others to do so.
23  *
24  * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25  * (Lawrence Berkeley National Lab, Computational Research
26  * Division).
27  *
28  */
34 #ifndef HSS_MATRIX_BASE_HPP
35 #define HSS_MATRIX_BASE_HPP
36 
37 #include <cassert>
38 #include <iostream>
39 #include <fstream>
40 #include <string>
41 #include <vector>
42 #include <functional>
43 
44 #include "dense/DenseMatrix.hpp"
45 #include "misc/Triplet.hpp"
46 #include "HSSOptions.hpp"
47 #include "HSSExtra.hpp"
48 #if defined(STRUMPACK_USE_MPI)
49 #include "dense/DistributedMatrix.hpp"
50 #include "HSSExtraMPI.hpp"
51 #include "HSSMatrixMPI.hpp"
52 #endif
53 
54 namespace strumpack {
55  namespace HSS {
56 
57 #ifndef DOXYGEN_SHOULD_SKIP_THIS
58  template<typename scalar_t> class HSSMatrix;
59 #if defined(STRUMPACK_USE_MPI)
60  template<typename scalar_t> class HSSMatrixMPI;
61  template<typename scalar_t> class DistSubLeaf;
62  template<typename scalar_t> class DistSamples;
63 #endif //defined(STRUMPACK_USE_MPI)
64 #endif //DOXYGEN_SHOULD_SKIP_THIS
65 
66 
81  template<typename scalar_t> class HSSMatrixBase {
82  using real_t = typename RealType<scalar_t>::value_type;
85  using elem_t = typename std::function
86  <void(const std::vector<std::size_t>& I,
87  const std::vector<std::size_t>& J, DenseM_t& B)>;
89 #if defined(STRUMPACK_USE_MPI)
92  using delem_t = typename std::function
93  <void(const std::vector<std::size_t>& I,
94  const std::vector<std::size_t>& J, DistM_t& B)>;
95 #endif //defined(STRUMPACK_USE_MPI)
96 
97  public:
106  HSSMatrixBase(std::size_t m, std::size_t n, bool active);
107 
111  virtual ~HSSMatrixBase() = default;
112 
118 
125 
130  HSSMatrixBase(HSSMatrixBase&& h) = default;
131 
137  HSSMatrixBase& operator=(HSSMatrixBase&& h) = default;
138 
145  virtual std::unique_ptr<HSSMatrixBase<scalar_t>> clone() const = 0;
146 
153  std::pair<std::size_t,std::size_t> dims() const {
154  return std::make_pair(_rows, _cols);
155  }
156 
161  std::size_t rows() const { return _rows; }
162 
167  std::size_t cols() const { return _cols; }
168 
173  bool leaf() const { return _ch.empty(); }
174 
184  const HSSMatrixBase<scalar_t>& child(int c) const {
185  assert(c>=0 && c<int(_ch.size())); return *(_ch[c]);
186  }
187 
198  assert(c>=0 && c<int(_ch.size())); return *(_ch[c]);
199  }
200 
209  bool is_compressed() const {
210  return _U_state == State::COMPRESSED &&
211  _V_state == State::COMPRESSED;
212  }
213 
224  bool is_untouched() const {
225  return _U_state == State::UNTOUCHED &&
226  _V_state == State::UNTOUCHED;
227  }
228 
235  bool active() const { return _active; }
236 
243  virtual std::size_t rank() const = 0;
244 
253  virtual std::size_t memory() const = 0;
254 
262  virtual std::size_t nonzeros() const = 0;
263 
269  virtual std::size_t levels() const = 0;
270 
283  virtual void print_info
284  (std::ostream &out=std::cout,
285  std::size_t roff=0, std::size_t coff=0) const = 0;
286 
296  void set_openmp_task_depth(int depth) { _openmp_task_depth = depth; }
297 
298 #ifndef DOXYGEN_SHOULD_SKIP_THIS
299  virtual void delete_trailing_block() { if (_ch.size()==2) _ch.resize(1); }
300  virtual void reset() {
301  _U_state = _V_state = State::UNTOUCHED;
302  _U_rank = _U_rows = _V_rank = _V_rows = 0;
303  for (auto& c : _ch) c->reset();
304  }
305 #endif
306 
314  virtual void shift(scalar_t sigma) = 0;
315 
322  virtual void draw
323  (std::ostream& of, std::size_t rlo, std::size_t clo) const {}
324 
325 #if defined(STRUMPACK_USE_MPI)
326  virtual void forward_solve
327  (const HSSFactorsMPI<scalar_t>& ULV, WorkSolveMPI<scalar_t>& w,
328  const DistM_t& b, bool partial) const;
329  virtual void backward_solve
330  (const HSSFactorsMPI<scalar_t>& ULV, WorkSolveMPI<scalar_t>& w,
331  DistM_t& x) const;
332 
333  virtual const BLACSGrid* grid() const { return nullptr; }
334  virtual const BLACSGrid* grid(const BLACSGrid* local_grid) const {
335  return active() ? local_grid : nullptr;
336  }
337  virtual const BLACSGrid* grid_local() const { return nullptr; }
338  virtual int Ptotal() const { return 1; }
339  virtual int Pactive() const { return 1; }
340 
341  virtual void to_block_row
342  (const DistM_t& A, DenseM_t& sub_A, DistM_t& leaf_A) const;
343  virtual void allocate_block_row
344  (int d, DenseM_t& sub_A, DistM_t& leaf_A) const;
345  virtual void from_block_row
346  (DistM_t& A, const DenseM_t& sub_A, const DistM_t& leaf_A,
347  const BLACSGrid* lg) const;
348 #endif //defined(STRUMPACK_USE_MPI)
349 
350  protected:
351  std::size_t _rows, _cols;
352 
353  // TODO store children array in the sub-class???
354  std::vector<std::unique_ptr<HSSMatrixBase<scalar_t>>> _ch;
355  State _U_state, _V_state;
356  int _openmp_task_depth;
357  bool _active;
358 
359  int _U_rank = 0, _U_rows = 0, _V_rank = 0, _V_rows = 0;
360  virtual std::size_t U_rank() const { return _U_rank; }
361  virtual std::size_t V_rank() const { return _V_rank; }
362  virtual std::size_t U_rows() const { return _U_rows; }
363  virtual std::size_t V_rows() const { return _V_rows; }
364 
365  // Used to redistribute the original 2D block cyclic matrix
366  // according to the HSS tree
367  DenseM_t _Asub;
368 
369  virtual void compress_recursive_original
370  (DenseM_t& Rr, DenseM_t& Rc, DenseM_t& Sr, DenseM_t& Sc,
371  const elem_t& Aelem, const opts_t& opts, WorkCompress<scalar_t>& w,
372  int dd, int depth) {}
373  virtual void compress_recursive_stable
374  (DenseM_t& Rr, DenseM_t& Rc, DenseM_t& Sr, DenseM_t& Sc,
375  const elem_t& Aelem, const opts_t& opts, WorkCompress<scalar_t>& w,
376  int d, int dd, int depth) {}
377  virtual void compress_level_original
378  (DenseM_t& Rr, DenseM_t& Rc, DenseM_t& Sr, DenseM_t& Sc,
379  const opts_t& opts, WorkCompress<scalar_t>& w,
380  int dd, int lvl, int depth) {}
381  virtual void compress_level_stable
382  (DenseM_t& Rr, DenseM_t& Rc, DenseM_t& Sr, DenseM_t& Sc,
383  const opts_t& opts, WorkCompress<scalar_t>& w,
384  int d, int dd, int lvl, int depth) {}
385  virtual void compress_recursive_ann
386  (DenseMatrix<std::uint32_t>& ann, DenseMatrix<real_t>& scores,
387  const elem_t& Aelem, const opts_t& opts,
388  WorkCompressANN<scalar_t>& w, int depth) {}
389 
390  virtual void get_extraction_indices
391  (std::vector<std::vector<std::size_t>>& I,
392  std::vector<std::vector<std::size_t>>& J,
393  const std::pair<std::size_t,std::size_t>& off,
394  WorkCompress<scalar_t>& w, int& self, int lvl) {}
395 
396  virtual void get_extraction_indices
397  (std::vector<std::vector<std::size_t>>& I,
398  std::vector<std::vector<std::size_t>>& J,
399  std::vector<DenseM_t*>& B,
400  const std::pair<std::size_t,std::size_t>& off,
401  WorkCompress<scalar_t>& w, int& self, int lvl) {}
402  virtual void extract_D_B
403  (const elem_t& Aelem, const opts_t& opts,
404  WorkCompress<scalar_t>& w, int lvl) {}
405 
406  virtual void factor_recursive
407  (HSSFactors<scalar_t>& ULV, WorkFactor<scalar_t>& w,
408  bool isroot, bool partial, int depth) const {}
409 
410  virtual void apply_fwd
411  (const DenseM_t& b, WorkApply<scalar_t>& w,
412  bool isroot, int depth, std::atomic<long long int>& flops) const {}
413  virtual void apply_bwd
414  (const DenseM_t& b, scalar_t beta, DenseM_t& c, WorkApply<scalar_t>& w,
415  bool isroot, int depth, std::atomic<long long int>& flops) const {}
416  virtual void applyT_fwd
417  (const DenseM_t& b, WorkApply<scalar_t>& w, bool isroot,
418  int depth, std::atomic<long long int>& flops) const {}
419  virtual void applyT_bwd
420  (const DenseM_t& b, scalar_t beta, DenseM_t& c, WorkApply<scalar_t>& w,
421  bool isroot, int depth, std::atomic<long long int>& flops) const {}
422 
423  virtual void forward_solve
424  (const HSSFactors<scalar_t>& ULV, WorkSolve<scalar_t>& w,
425  const DenseMatrix<scalar_t>& b, bool partial) const {}
426  virtual void backward_solve
427  (const HSSFactors<scalar_t>& ULV, WorkSolve<scalar_t>& w,
428  DenseMatrix<scalar_t>& b) const {}
429  virtual void solve_fwd
430  (const HSSFactors<scalar_t>& ULV, const DenseM_t& b,
431  WorkSolve<scalar_t>& w, bool partial, bool isroot, int depth) const {}
432  virtual void solve_bwd
433  (const HSSFactors<scalar_t>& ULV, DenseM_t& x, WorkSolve<scalar_t>& w,
434  bool isroot, int depth) const {}
435 
436  virtual void extract_fwd
437  (WorkExtract<scalar_t>& w, bool odiag, int depth) const {}
438  virtual void extract_bwd
439  (DenseMatrix<scalar_t>& B, WorkExtract<scalar_t>& w,
440  int depth) const {}
441  virtual void extract_bwd
442  (std::vector<Triplet<scalar_t>>& triplets,
443  WorkExtract<scalar_t>& w, int depth) const {}
444 
445  virtual void apply_UV_big
446  (DenseM_t& Theta, DenseM_t& Uop, DenseM_t& Phi, DenseM_t& Vop,
447  const std::pair<std::size_t, std::size_t>& offset, int depth,
448  std::atomic<long long int>& flops) const {}
449  virtual void apply_UtVt_big
450  (const DenseM_t& A, DenseM_t& UtA, DenseM_t& VtA,
451  const std::pair<std::size_t, std::size_t>& offset,
452  int depth, std::atomic<long long int>& flops) const {}
453 
454  virtual void dense_recursive
455  (DenseM_t& A, WorkDense<scalar_t>& w, bool isroot, int depth) const {}
456 
457  virtual void read(std::ifstream& os) {
458  std::cerr << "ERROR read_HSS_node not implemented" << std::endl;
459  }
460  virtual void write(std::ofstream& os) const {
461  std::cerr << "ERROR write_HSS_node not implemented" << std::endl;
462  }
463 
464  friend class HSSMatrix<scalar_t>;
465 
466 #if defined(STRUMPACK_USE_MPI)
467  using delemw_t = typename std::function
468  <void(const std::vector<std::size_t>& I,
469  const std::vector<std::size_t>& J,
470  DistM_t& B, DistM_t& A,
471  std::size_t rlo, std::size_t clo,
472  MPI_Comm comm)>;
473 
474  virtual void compress_recursive_original
475  (DistSamples<scalar_t>& RS, const delemw_t& Aelem,
476  const opts_t& opts, WorkCompressMPI<scalar_t>& w, int dd);
477  virtual void compress_recursive_stable
478  (DistSamples<scalar_t>& RS, const delemw_t& Aelem,
479  const opts_t& opts, WorkCompressMPI<scalar_t>& w, int d, int dd);
480  virtual void compress_level_original
481  (DistSamples<scalar_t>& RS, const opts_t& opts,
482  WorkCompressMPI<scalar_t>& w, int dd, int lvl);
483  virtual void compress_level_stable
484  (DistSamples<scalar_t>& RS, const opts_t& opts,
485  WorkCompressMPI<scalar_t>& w, int d, int dd, int lvl);
486  virtual void compress_recursive_ann
487  (DenseMatrix<std::uint32_t>& ann, DenseMatrix<real_t>& scores,
488  const delemw_t& Aelem, WorkCompressMPIANN<scalar_t>& w,
489  const opts_t& opts, const BLACSGrid* lg);
490 
491  virtual void get_extraction_indices
492  (std::vector<std::vector<std::size_t>>& I,
493  std::vector<std::vector<std::size_t>>& J,
494  WorkCompressMPI<scalar_t>& w, int& self, int lvl);
495  virtual void get_extraction_indices
496  (std::vector<std::vector<std::size_t>>& I,
497  std::vector<std::vector<std::size_t>>& J, std::vector<DistMW_t>& B,
498  const BLACSGrid* lg, WorkCompressMPI<scalar_t>& w, int& self, int lvl);
499  virtual void extract_D_B
500  (const delemw_t& Aelem, const BLACSGrid* lg, const opts_t& opts,
501  WorkCompressMPI<scalar_t>& w, int lvl);
502 
503  virtual void apply_fwd
504  (const DistSubLeaf<scalar_t>& B, WorkApplyMPI<scalar_t>& w,
505  bool isroot, long long int flops) const;
506  virtual void apply_bwd
507  (const DistSubLeaf<scalar_t>& B, scalar_t beta,
508  DistSubLeaf<scalar_t>& C, WorkApplyMPI<scalar_t>& w,
509  bool isroot, long long int flops) const;
510  virtual void applyT_fwd
511  (const DistSubLeaf<scalar_t>& B, WorkApplyMPI<scalar_t>& w,
512  bool isroot, long long int flops) const;
513  virtual void applyT_bwd
514  (const DistSubLeaf<scalar_t>& B, scalar_t beta,
515  DistSubLeaf<scalar_t>& C, WorkApplyMPI<scalar_t>& w,
516  bool isroot, long long int flops) const;
517 
518  virtual void factor_recursive
519  (HSSFactorsMPI<scalar_t>& ULV, WorkFactorMPI<scalar_t>& w,
520  const BLACSGrid* lg, bool isroot, bool partial) const;
521 
522  virtual void solve_fwd
523  (const HSSFactorsMPI<scalar_t>& ULV, const DistSubLeaf<scalar_t>& b,
524  WorkSolveMPI<scalar_t>& w, bool partial, bool isroot) const;
525  virtual void solve_bwd
526  (const HSSFactorsMPI<scalar_t>& ULV, DistSubLeaf<scalar_t>& x,
527  WorkSolveMPI<scalar_t>& w, bool isroot) const;
528 
529  virtual void extract_fwd
530  (WorkExtractMPI<scalar_t>& w, const BLACSGrid* lg, bool odiag) const;
531  virtual void extract_bwd
532  (std::vector<Triplet<scalar_t>>& triplets,
533  const BLACSGrid* lg, WorkExtractMPI<scalar_t>& w) const;
534  virtual void extract_fwd
535  (WorkExtractBlocksMPI<scalar_t>& w, const BLACSGrid* lg,
536  std::vector<bool>& odiag) const;
537  virtual void extract_bwd
538  (std::vector<std::vector<Triplet<scalar_t>>>& triplets,
539  const BLACSGrid* lg, WorkExtractBlocksMPI<scalar_t>& w) const;
540 
541  virtual void apply_UV_big
542  (DistSubLeaf<scalar_t>& Theta, DistM_t& Uop,
543  DistSubLeaf<scalar_t>& Phi, DistM_t& Vop,
544  long long int& flops) const;
545 
546  virtual void redistribute_to_tree_to_buffers
547  (const DistM_t& A, std::size_t Arlo, std::size_t Aclo,
548  std::vector<std::vector<scalar_t>>& sbuf, int dest);
549  virtual void redistribute_to_tree_from_buffers
550  (const DistM_t& A, std::size_t Arlo, std::size_t Aclo,
551  std::vector<scalar_t*>& pbuf);
552  virtual void delete_redistributed_input();
553 
554  friend class HSSMatrixMPI<scalar_t>;
555 #endif //defined(STRUMPACK_USE_MPI)
556  };
557 
558  } // end namespace HSS
559 } // end namespace strumpack
560 
561 
562 #endif // HSS_MATRIX_BASE_HPP
strumpack::BLACSGrid
This is a small wrapper class around a BLACS grid and a BLACS context.
Definition: BLACSGrid.hpp:66
strumpack::HSS::HSSMatrixBase::~HSSMatrixBase
virtual ~HSSMatrixBase()=default
strumpack::HSS::HSSMatrixBase::clone
virtual std::unique_ptr< HSSMatrixBase< scalar_t > > clone() const =0
strumpack::HSS::HSSMatrixBase::leaf
bool leaf() const
Definition: HSSMatrixBase.hpp:173
strumpack::HSS::HSSMatrixBase::HSSMatrixBase
HSSMatrixBase(std::size_t m, std::size_t n, bool active)
strumpack::HSS::HSSOptions
Class containing several options for the HSS code and data-structures.
Definition: HSSOptions.hpp:117
strumpack::HSS::HSSMatrixBase::draw
virtual void draw(std::ostream &of, std::size_t rlo, std::size_t clo) const
Definition: HSSMatrixBase.hpp:323
strumpack::HSS::HSSMatrixBase::rank
virtual std::size_t rank() const =0
DenseMatrix.hpp
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
strumpack
Definition: StrumpackOptions.hpp:42
strumpack::DistributedMatrix
Definition: CompressedSparseMatrix.hpp:58
strumpack::HSS::HSSMatrixBase::print_info
virtual void print_info(std::ostream &out=std::cout, std::size_t roff=0, std::size_t coff=0) const =0
strumpack::DenseMatrixWrapper
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:991
strumpack::HSS::HSSMatrixBase::levels
virtual std::size_t levels() const =0
strumpack::HSS::HSSMatrixBase::shift
virtual void shift(scalar_t sigma)=0
strumpack::HSS::HSSMatrixBase::rows
std::size_t rows() const
Definition: HSSMatrixBase.hpp:161
strumpack::DenseMatrix< scalar_t >
strumpack::DistributedMatrixWrapper
Definition: DistributedMatrix.hpp:277
strumpack::HSS::State::COMPRESSED
@ COMPRESSED
strumpack::HSS::HSSMatrixBase::nonzeros
virtual std::size_t nonzeros() const =0
strumpack::HSS::HSSMatrixBase::is_untouched
bool is_untouched() const
Definition: HSSMatrixBase.hpp:224
strumpack::HSS::HSSFactorsMPI
Contains data related to ULV factorization of a distributed HSS matrix.
Definition: HSSExtraMPI.hpp:184
strumpack::HSS::HSSMatrixBase::set_openmp_task_depth
void set_openmp_task_depth(int depth)
Definition: HSSMatrixBase.hpp:296
strumpack::HSS::HSSMatrixBase::active
bool active() const
Definition: HSSMatrixBase.hpp:235
strumpack::HSS::HSSMatrixBase::cols
std::size_t cols() const
Definition: HSSMatrixBase.hpp:167
strumpack::HSS::State::UNTOUCHED
@ UNTOUCHED
HSSOptions.hpp
Contains the HSSOptions class as well as general routines for HSS options.
strumpack::HSS::State
State
Definition: HSSExtra.hpp:45
strumpack::HSS::HSSMatrixBase::dims
std::pair< std::size_t, std::size_t > dims() const
Definition: HSSMatrixBase.hpp:153
strumpack::HSS::HSSMatrixBase::child
const HSSMatrixBase< scalar_t > & child(int c) const
Definition: HSSMatrixBase.hpp:184
strumpack::CompressionType::HSS
@ HSS
strumpack::HSS::HSSMatrixBase::is_compressed
bool is_compressed() const
Definition: HSSMatrixBase.hpp:209
strumpack::HSS::HSSMatrixBase
Abstract base class for Hierarchically Semi-Separable (HSS) matrices.
Definition: HSSMatrixBase.hpp:81
strumpack::HSS::HSSMatrixBase::operator=
HSSMatrixBase< scalar_t > & operator=(const HSSMatrixBase< scalar_t > &other)
strumpack::HSS::HSSMatrixBase::child
HSSMatrixBase< scalar_t > & child(int c)
Definition: HSSMatrixBase.hpp:197
strumpack::HSS::HSSMatrixBase::memory
virtual std::size_t memory() const =0