HSSExtra.hpp
1 /*
2  * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3  * Regents of the University of California, through Lawrence Berkeley
4  * National Laboratory (subject to receipt of any required approvals
5  * from the U.S. Dept. of Energy). All rights reserved.
6  *
7  * If you have questions about your rights to use or distribute this
8  * software, please contact Berkeley Lab's Technology Transfer
9  * Department at TTD@lbl.gov.
10  *
11  * NOTICE. This software is owned by the U.S. Department of Energy. As
12  * such, the U.S. Government has been granted for itself and others
13  * acting on its behalf a paid-up, nonexclusive, irrevocable,
14  * worldwide license in the Software to reproduce, prepare derivative
15  * works, and perform publicly and display publicly. Beginning five
16  * (5) years after the date permission to assert copyright is obtained
17  * from the U.S. Department of Energy, and subject to any subsequent
18  * five (5) year renewals, the U.S. Government is granted for itself
19  * and others acting on its behalf a paid-up, nonexclusive,
20  * irrevocable, worldwide license in the Software to reproduce,
21  * prepare derivative works, distribute copies to the public, perform
22  * publicly and display publicly, and to permit others to do so.
23  *
24  * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25  * (Lawrence Berkeley National Lab, Computational Research
26  * Division).
27  */
28 #ifndef HSS_EXTRA_HPP
29 #define HSS_EXTRA_HPP
30 
31 #include "dense/DenseMatrix.hpp"
32 
33 namespace strumpack {
34  namespace HSS {
35 
45  enum class State : char
46  {UNTOUCHED='U',
52  COMPRESSED='C'
54  };
55 
56 #ifndef DOXYGEN_SHOULD_SKIP_THIS
57  template<typename T> std::pair<T,T>
58  operator+(const std::pair<T,T>& l, const std::pair<T,T>& r) {
59  return {l.first+r.first, l.second+r.second};
60  }
61 #endif // DOXYGEN_SHOULD_SKIP_THIS
62 
63 
64 #ifndef DOXYGEN_SHOULD_SKIP_THIS
65  template<typename scalar_t> class WorkCompressBase {
66  public:
67  WorkCompressBase() {}
68  virtual ~WorkCompressBase() {}
69  virtual void split(const std::pair<std::size_t,std::size_t>& dim) = 0;
70 
71  std::vector<std::size_t> Ir, Ic, Jr, Jc;
72  std::pair<std::size_t,std::size_t> offset;
73  int lvl = 0;
74  scalar_t U_r_max, V_r_max;
75  };
76 
77  template<typename scalar_t> class WorkCompress :
78  public WorkCompressBase<scalar_t> {
79  public:
80  std::vector<WorkCompress<scalar_t>> c;
81  // only needed in the new compression algorithm
82  DenseMatrix<scalar_t> Qr, Qc;
83  void split(const std::pair<std::size_t,std::size_t>& dim) {
84  if (c.empty()) {
85  c.resize(2);
86  c[0].offset = this->offset;
87  c[1].offset = this->offset + dim;
88  c[0].lvl = c[1].lvl = this->lvl + 1;
89  }
90  }
91  };
92 
93 
94  template<typename scalar_t,
95  typename real_t=typename RealType<scalar_t>::value_type>
96  class WorkCompressANN :
97  public WorkCompressBase<scalar_t> {
98  public:
99  std::vector<WorkCompressANN<scalar_t>> c;
100  DenseMatrix<scalar_t> S;
101  std::vector<std::pair<std::size_t,real_t>> ids_scores;
102  void split(const std::pair<std::size_t,std::size_t>& dim) {
103  if (c.empty()) {
104  c.resize(2);
105  c[0].offset = this->offset;
106  c[1].offset = this->offset + dim;
107  c[0].lvl = c[1].lvl = this->lvl + 1;
108  }
109  }
110  };
111 
112  template<typename scalar_t> class WorkApply {
113  public:
114  std::pair<std::size_t,std::size_t> offset;
115  std::vector<WorkApply<scalar_t>> c;
116  DenseMatrix<scalar_t> tmp1, tmp2;
117  };
118 
119  template<typename scalar_t> class WorkExtract {
120  public:
121  std::vector<WorkExtract<scalar_t>> c;
122  DenseMatrix<scalar_t> y, z;
123  std::vector<std::size_t> I, J, rl2g, cl2g, ycols, zcols;
124  void split_extraction_sets
125  (const std::pair<std::size_t,std::size_t>& dim) {
126  if (c.empty()) {
127  c.resize(2);
128  c[0].I.reserve(I.size());
129  c[1].I.reserve(I.size());
130  for (auto i : I)
131  if (i < dim.first) c[0].I.push_back(i);
132  else c[1].I.push_back(i - dim.first);
133  c[0].J.reserve(J.size());
134  c[1].J.reserve(J.size());
135  for (auto j : J)
136  if (j < dim.second) c[0].J.push_back(j);
137  else c[1].J.push_back(j - dim.second);
138  }
139  }
140  };
141 
142  template<typename scalar_t> class WorkFactor {
143  public:
144  std::vector<WorkFactor<scalar_t>> c;
145  DenseMatrix<scalar_t> Dt; // (U.cols x U.cols) \tilde(D)
146  DenseMatrix<scalar_t> Vt1; // (U.cols x V.cols) bottom part of \tilde{V}
147  };
148 #endif // DOXYGEN_SHOULD_SKIP_THIS
149 
150 
161  template<typename scalar_t> class HSSFactors {
162  public:
163 
171  std::size_t memory() {
172  std::size_t mem = sizeof(*this) + _L.memory() + _Vt0.memory()
173  + _W1.memory() + _Q.memory() + _D.memory()
174  + sizeof(int)*_piv.size();
175  for (auto& c : _ch) mem += c.memory();
176  return mem;
177  }
178 
185  std::size_t nonzeros() const {
186  std::size_t nnz = _L.nonzeros() + _Vt0.nonzeros() + _W1.nonzeros()
187  + _Q.nonzeros() + _D.nonzeros();
188  for (auto& c : _ch) nnz += c.nonzeros();
189  return nnz;
190  }
191 
195  const DenseMatrix<scalar_t>& Vhat() const { return _Vt0; }
199  DenseMatrix<scalar_t>& Vhat() { return _Vt0; }
200 
201  private:
202  std::vector<HSSFactors<scalar_t>> _ch;
203  DenseMatrix<scalar_t> _L; // (U.rows-U.cols x U.rows-U.cols),
204  // empty at the root
205  DenseMatrix<scalar_t> _Vt0; // (U.rows-U.cols x V.cols)
206  // at the root, _Vt0 stored Vhat
207  DenseMatrix<scalar_t> _W1; // (U.cols x U.rows) bottom part of W
208  // if (U.rows == U.cols)
209  // then W == I and is not stored!
210  DenseMatrix<scalar_t> _Q; // (U.rows x U.rows) Q from LQ(W0)
211  // if (U.rows == U.cols)
212  // then Q == I and is not stored!
213  DenseMatrix<scalar_t> _D; // (U.rows x U.rows) at the root holds LU(D)
214  // else empty
215  std::vector<int> _piv; // hold permutation from LU(D) at root
216  template<typename T> friend class HSSMatrix;
217  template<typename T> friend class HSSMatrixBase;
218  };
219 
220 #ifndef DOXYGEN_SHOULD_SKIP_THIS
221  template<typename scalar_t> class WorkSolve {
222  public:
223  std::vector<WorkSolve<scalar_t>> c;
224 
225  // do we need all these?? x only used in bwd, y only used in fwd??
230 
231  // DO NOT STORE reduced_rhs here!!!
232  DenseMatrix<scalar_t> reduced_rhs;
233  std::pair<std::size_t,std::size_t> offset;
234  };
235 #endif // DOXYGEN_SHOULD_SKIP_THIS
236 
237 
238 #ifndef DOXYGEN_SHOULD_SKIP_THIS
239  template<typename scalar_t> class AFunctor {
240  using DenseM_t = DenseMatrix<scalar_t>;
241  public:
242  AFunctor(const DenseM_t& A) : _A(A) {}
243  const DenseM_t& _A;
244  void operator()
245  (DenseM_t& Rr, DenseM_t& Rc, DenseM_t& Sr, DenseM_t& Sc) {
246  gemm(Trans::N, Trans::N, scalar_t(1.), _A, Rr, scalar_t(0.), Sr);
247  gemm(Trans::C, Trans::N, scalar_t(1.), _A, Rc, scalar_t(0.), Sc);
248  }
249  void operator()(const std::vector<size_t>& I,
250  const std::vector<size_t>& J, DenseM_t& B) {
251  assert(I.size() == B.rows() && J.size() == B.cols());
252  for (std::size_t j=0; j<J.size(); j++)
253  for (std::size_t i=0; i<I.size(); i++) {
254  assert(I[i] >= 0 && I[i] < _A.rows() &&
255  J[j] >= 0 && J[j] < _A.cols());
256  B(i,j) = _A(I[i], J[j]);
257  }
258  }
259  };
260 #endif // DOXYGEN_SHOULD_SKIP_THIS
261 
262 
263 #ifndef DOXYGEN_SHOULD_SKIP_THIS
264  template<typename scalar_t> class WorkDense {
265  public:
266  std::pair<std::size_t,std::size_t> offset;
267  std::vector<WorkDense<scalar_t>> c;
268  DenseMatrix<scalar_t> tmpU, tmpV;
269  };
270 #endif // DOXYGEN_SHOULD_SKIP_THIS
271 
272  } // end namespace HSS
273 } // end namespace strumpack
274 
275 #endif // HSS_EXTRA_HPP
strumpack::Trans::N
@ N
strumpack::Trans::C
@ C
DenseMatrix.hpp
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
strumpack::HSS::HSSFactors::Vhat
const DenseMatrix< scalar_t > & Vhat() const
Definition: HSSExtra.hpp:195
strumpack::DenseMatrix::nonzeros
virtual std::size_t nonzeros() const
Definition: DenseMatrix.hpp:659
strumpack::HSS::State::PARTIALLY_COMPRESSED
@ PARTIALLY_COMPRESSED
strumpack
Definition: StrumpackOptions.hpp:42
strumpack::HSS::HSSFactors::Vhat
DenseMatrix< scalar_t > & Vhat()
Definition: HSSExtra.hpp:199
strumpack::DenseMatrix< scalar_t >
strumpack::HSS::HSSFactors
Contains data related to ULV factorization of an HSS matrix.
Definition: HSSExtra.hpp:161
strumpack::HSS::State::COMPRESSED
@ COMPRESSED
strumpack::DenseMatrix::memory
virtual std::size_t memory() const
Definition: DenseMatrix.hpp:651
strumpack::HSS::HSSFactors::memory
std::size_t memory()
Definition: HSSExtra.hpp:171
strumpack::HSS::HSSMatrix
Class to represent a sequential/threaded Hierarchically Semi-Separable matrix.
Definition: HSSMatrix.hpp:76
strumpack::HSS::State::UNTOUCHED
@ UNTOUCHED
strumpack::HSS::HSSFactors::nonzeros
std::size_t nonzeros() const
Definition: HSSExtra.hpp:185
strumpack::HSS::State
State
Definition: HSSExtra.hpp:45
strumpack::CompressionType::HSS
@ HSS
strumpack::gemm
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
strumpack::HSS::HSSMatrixBase
Abstract base class for Hierarchically Semi-Separable (HSS) matrices.
Definition: HSSMatrixBase.hpp:81