HSSExtra.hpp
1 /*
2  * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3  * Regents of the University of California, through Lawrence Berkeley
4  * National Laboratory (subject to receipt of any required approvals
5  * from the U.S. Dept. of Energy). All rights reserved.
6  *
7  * If you have questions about your rights to use or distribute this
8  * software, please contact Berkeley Lab's Technology Transfer
9  * Department at TTD@lbl.gov.
10  *
11  * NOTICE. This software is owned by the U.S. Department of Energy. As
12  * such, the U.S. Government has been granted for itself and others
13  * acting on its behalf a paid-up, nonexclusive, irrevocable,
14  * worldwide license in the Software to reproduce, prepare derivative
15  * works, and perform publicly and display publicly. Beginning five
16  * (5) years after the date permission to assert copyright is obtained
17  * from the U.S. Department of Energy, and subject to any subsequent
18  * five (5) year renewals, the U.S. Government is granted for itself
19  * and others acting on its behalf a paid-up, nonexclusive,
20  * irrevocable, worldwide license in the Software to reproduce,
21  * prepare derivative works, distribute copies to the public, perform
22  * publicly and display publicly, and to permit others to do so.
23  *
24  * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25  * (Lawrence Berkeley National Lab, Computational Research
26  * Division).
27  */
28 #ifndef HSS_EXTRA_HPP
29 #define HSS_EXTRA_HPP
30 
31 #include "dense/DenseMatrix.hpp"
32 
33 namespace strumpack {
34  namespace HSS {
35 
45  enum class State : char
46  {UNTOUCHED='U',
52  COMPRESSED='C'
54  };
55 
56 #ifndef DOXYGEN_SHOULD_SKIP_THIS
57  template<typename T> std::pair<T,T>
58  operator+(const std::pair<T,T>& l, const std::pair<T,T>& r) {
59  return {l.first+r.first, l.second+r.second};
60  }
61 #endif // DOXYGEN_SHOULD_SKIP_THIS
62 
63 
64 #ifndef DOXYGEN_SHOULD_SKIP_THIS
65  template<typename scalar_t> class WorkCompressBase {
66  public:
67  WorkCompressBase() {}
68  virtual ~WorkCompressBase() {}
69  virtual void split(const std::pair<std::size_t,std::size_t>& dim) = 0;
70 
71  std::vector<std::size_t> Ir, Ic, Jr, Jc;
72  std::pair<std::size_t,std::size_t> offset;
73  int lvl = 0;
74  scalar_t U_r_max, V_r_max;
75  };
76 
77  template<typename scalar_t> class WorkCompress :
78  public WorkCompressBase<scalar_t> {
79  public:
80  std::vector<WorkCompress<scalar_t>> c;
81  // only needed in the new compression algorithm
82  DenseMatrix<scalar_t> Qr, Qc;
83  void split(const std::pair<std::size_t,std::size_t>& dim) {
84  if (c.empty()) {
85  c.resize(2);
86  c[0].offset = this->offset;
87  c[1].offset = this->offset + dim;
88  c[0].lvl = c[1].lvl = this->lvl + 1;
89  }
90  }
91  };
92 
93 
94  template<typename scalar_t,
95  typename real_t=typename RealType<scalar_t>::value_type>
96  class WorkCompressANN :
97  public WorkCompressBase<scalar_t> {
98  public:
99  std::vector<WorkCompressANN<scalar_t>> c;
100  DenseMatrix<scalar_t> S;
101  std::vector<std::pair<std::size_t,real_t>> ids_scores;
102  void split(const std::pair<std::size_t,std::size_t>& dim) {
103  if (c.empty()) {
104  c.resize(2);
105  c[0].offset = this->offset;
106  c[1].offset = this->offset + dim;
107  c[0].lvl = c[1].lvl = this->lvl + 1;
108  }
109  }
110  };
111 
112  template<typename scalar_t> class WorkApply {
113  public:
114  std::pair<std::size_t,std::size_t> offset;
115  std::vector<WorkApply<scalar_t>> c;
116  DenseMatrix<scalar_t> tmp1, tmp2;
117  };
118 
119  template<typename scalar_t> class WorkExtract {
120  public:
121  std::vector<WorkExtract<scalar_t>> c;
122  DenseMatrix<scalar_t> y, z;
123  std::vector<std::size_t> I, J, rl2g, cl2g, ycols, zcols;
124  void split_extraction_sets
125  (const std::pair<std::size_t,std::size_t>& dim) {
126  if (c.empty()) {
127  c.resize(2);
128  c[0].I.reserve(I.size());
129  c[1].I.reserve(I.size());
130  for (auto i : I)
131  if (i < dim.first) c[0].I.push_back(i);
132  else c[1].I.push_back(i - dim.first);
133  c[0].J.reserve(J.size());
134  c[1].J.reserve(J.size());
135  for (auto j : J)
136  if (j < dim.second) c[0].J.push_back(j);
137  else c[1].J.push_back(j - dim.second);
138  }
139  }
140  };
141 
142  template<typename scalar_t> class WorkFactor {
143  public:
144  std::vector<WorkFactor<scalar_t>> c;
145  DenseMatrix<scalar_t> Dt; // (U.cols x U.cols) \tilde(D)
146  DenseMatrix<scalar_t> Vt1; // (U.cols x V.cols) bottom part of \tilde{V}
147  };
148 #endif // DOXYGEN_SHOULD_SKIP_THIS
149 
150 
161  template<typename scalar_t> class HSSFactors {
162  public:
163 
171  std::size_t memory() {
172  return sizeof(*this) + L_.memory() + Vt0_.memory()
173  + W1_.memory() + Q_.memory() + D_.memory()
174  + sizeof(int)*piv_.size();
175  }
176 
183  std::size_t nonzeros() const {
184  return L_.nonzeros() + Vt0_.nonzeros() + W1_.nonzeros()
185  + Q_.nonzeros() + D_.nonzeros();
186  }
187 
191  const DenseMatrix<scalar_t>& Vhat() const { return Vt0_; }
195  DenseMatrix<scalar_t>& Vhat() { return Vt0_; }
196 
197  private:
198  DenseMatrix<scalar_t> L_; // (U.rows-U.cols x U.rows-U.cols),
199  // empty at the root
200  DenseMatrix<scalar_t> Vt0_; // (U.rows-U.cols x V.cols)
201  // at the root, _Vt0 stored Vhat
202  DenseMatrix<scalar_t> W1_; // (U.cols x U.rows) bottom part of W
203  // if (U.rows == U.cols)
204  // then W == I and is not stored!
205  DenseMatrix<scalar_t> Q_; // (U.rows x U.rows) Q from LQ(W0)
206  // if (U.rows == U.cols)
207  // then Q == I and is not stored!
208  DenseMatrix<scalar_t> D_; // (U.rows x U.rows) at the root holds LU(D)
209  // else empty
210  std::vector<int> piv_; // hold permutation from LU(D) at root
211  template<typename T> friend class HSSMatrix;
212  template<typename T> friend class HSSMatrixBase;
213  };
214 
215 #ifndef DOXYGEN_SHOULD_SKIP_THIS
216  template<typename scalar_t> class WorkSolve {
217  public:
218  std::vector<WorkSolve<scalar_t>> c;
219 
220  // do we need all these?? x only used in bwd, y only used in fwd??
221  DenseMatrix<scalar_t> z, ft1, y, x;
222 
223  // DO NOT STORE reduced_rhs here!!!
224  DenseMatrix<scalar_t> reduced_rhs;
225  std::pair<std::size_t,std::size_t> offset;
226  };
227 #endif // DOXYGEN_SHOULD_SKIP_THIS
228 
229 
230 #ifndef DOXYGEN_SHOULD_SKIP_THIS
231  template<typename scalar_t> class AFunctor {
232  using DenseM_t = DenseMatrix<scalar_t>;
233  public:
234  AFunctor(const DenseM_t& A) : _A(A) {}
235  const DenseM_t& _A;
236  void operator()
237  (DenseM_t& Rr, DenseM_t& Rc, DenseM_t& Sr, DenseM_t& Sc) {
238  gemm(Trans::N, Trans::N, scalar_t(1.), _A, Rr, scalar_t(0.), Sr);
239  gemm(Trans::C, Trans::N, scalar_t(1.), _A, Rc, scalar_t(0.), Sc);
240  }
241  void operator()(const std::vector<size_t>& I,
242  const std::vector<size_t>& J, DenseM_t& B) {
243  assert(I.size() == B.rows() && J.size() == B.cols());
244  for (std::size_t j=0; j<J.size(); j++)
245  for (std::size_t i=0; i<I.size(); i++) {
246  assert(I[i] >= 0 && I[i] < _A.rows() &&
247  J[j] >= 0 && J[j] < _A.cols());
248  B(i,j) = _A(I[i], J[j]);
249  }
250  }
251  };
252 #endif // DOXYGEN_SHOULD_SKIP_THIS
253 
254 
255 #ifndef DOXYGEN_SHOULD_SKIP_THIS
256  template<typename scalar_t> class WorkDense {
257  public:
258  std::pair<std::size_t,std::size_t> offset;
259  std::vector<WorkDense<scalar_t>> c;
260  DenseMatrix<scalar_t> tmpU, tmpV;
261  };
262 #endif // DOXYGEN_SHOULD_SKIP_THIS
263 
264  } // end namespace HSS
265 } // end namespace strumpack
266 
267 #endif // HSS_EXTRA_HPP
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Contains data related to ULV factorization of an HSS matrix.
Definition: HSSExtra.hpp:161
DenseMatrix< scalar_t > & Vhat()
Definition: HSSExtra.hpp:195
const DenseMatrix< scalar_t > & Vhat() const
Definition: HSSExtra.hpp:191
std::size_t memory()
Definition: HSSExtra.hpp:171
std::size_t nonzeros() const
Definition: HSSExtra.hpp:183
Abstract base class for Hierarchically Semi-Separable (HSS) matrices.
Definition: HSSMatrixBase.hpp:83
Class to represent a sequential/threaded Hierarchically Semi-Separable matrix.
Definition: HSSMatrix.hpp:80
State
Definition: HSSExtra.hpp:46
Definition: StrumpackOptions.hpp:43
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)