HSSExtra.hpp
1/*
2 * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3 * Regents of the University of California, through Lawrence Berkeley
4 * National Laboratory (subject to receipt of any required approvals
5 * from the U.S. Dept. of Energy). All rights reserved.
6 *
7 * If you have questions about your rights to use or distribute this
8 * software, please contact Berkeley Lab's Technology Transfer
9 * Department at TTD@lbl.gov.
10 *
11 * NOTICE. This software is owned by the U.S. Department of Energy. As
12 * such, the U.S. Government has been granted for itself and others
13 * acting on its behalf a paid-up, nonexclusive, irrevocable,
14 * worldwide license in the Software to reproduce, prepare derivative
15 * works, and perform publicly and display publicly. Beginning five
16 * (5) years after the date permission to assert copyright is obtained
17 * from the U.S. Department of Energy, and subject to any subsequent
18 * five (5) year renewals, the U.S. Government is granted for itself
19 * and others acting on its behalf a paid-up, nonexclusive,
20 * irrevocable, worldwide license in the Software to reproduce,
21 * prepare derivative works, distribute copies to the public, perform
22 * publicly and display publicly, and to permit others to do so.
23 *
24 * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25 * (Lawrence Berkeley National Lab, Computational Research
26 * Division).
27 */
28#ifndef HSS_EXTRA_HPP
29#define HSS_EXTRA_HPP
30
31#include "dense/DenseMatrix.hpp"
32
33namespace strumpack {
34 namespace HSS {
35
45 enum class State : char
46 {UNTOUCHED='U',
52 COMPRESSED='C'
54 };
55
56#ifndef DOXYGEN_SHOULD_SKIP_THIS
57 template<typename T> std::pair<T,T>
58 operator+(const std::pair<T,T>& l, const std::pair<T,T>& r) {
59 return {l.first+r.first, l.second+r.second};
60 }
61#endif // DOXYGEN_SHOULD_SKIP_THIS
62
63
64#ifndef DOXYGEN_SHOULD_SKIP_THIS
65 template<typename scalar_t> class WorkCompressBase {
66 public:
67 WorkCompressBase() {}
68 virtual ~WorkCompressBase() {}
69 virtual void split(const std::pair<std::size_t,std::size_t>& dim) = 0;
70
71 std::vector<std::size_t> Ir, Ic, Jr, Jc;
72 std::pair<std::size_t,std::size_t> offset;
73 int lvl = 0;
74 scalar_t U_r_max, V_r_max;
75 };
76
77 template<typename scalar_t> class WorkCompress :
78 public WorkCompressBase<scalar_t> {
79 public:
80 std::vector<WorkCompress<scalar_t>> c;
81 // only needed in the new compression algorithm
82 DenseMatrix<scalar_t> Qr, Qc;
83 void split(const std::pair<std::size_t,std::size_t>& dim) {
84 if (c.empty()) {
85 c.resize(2);
86 c[0].offset = this->offset;
87 c[1].offset = this->offset + dim;
88 c[0].lvl = c[1].lvl = this->lvl + 1;
89 }
90 }
91 };
92
93
94 template<typename scalar_t,
95 typename real_t=typename RealType<scalar_t>::value_type>
96 class WorkCompressANN :
97 public WorkCompressBase<scalar_t> {
98 public:
99 std::vector<WorkCompressANN<scalar_t>> c;
100 DenseMatrix<scalar_t> S;
101 std::vector<std::pair<std::size_t,real_t>> ids_scores;
102 void split(const std::pair<std::size_t,std::size_t>& dim) {
103 if (c.empty()) {
104 c.resize(2);
105 c[0].offset = this->offset;
106 c[1].offset = this->offset + dim;
107 c[0].lvl = c[1].lvl = this->lvl + 1;
108 }
109 }
110 };
111
112 template<typename scalar_t> class WorkApply {
113 public:
114 std::pair<std::size_t,std::size_t> offset;
115 std::vector<WorkApply<scalar_t>> c;
116 DenseMatrix<scalar_t> tmp1, tmp2;
117 };
118
119 template<typename scalar_t> class WorkExtract {
120 public:
121 std::vector<WorkExtract<scalar_t>> c;
122 DenseMatrix<scalar_t> y, z;
123 std::vector<std::size_t> I, J, rl2g, cl2g, ycols, zcols;
124 void split_extraction_sets
125 (const std::pair<std::size_t,std::size_t>& dim) {
126 if (c.empty()) {
127 c.resize(2);
128 c[0].I.reserve(I.size());
129 c[1].I.reserve(I.size());
130 for (auto i : I)
131 if (i < dim.first) c[0].I.push_back(i);
132 else c[1].I.push_back(i - dim.first);
133 c[0].J.reserve(J.size());
134 c[1].J.reserve(J.size());
135 for (auto j : J)
136 if (j < dim.second) c[0].J.push_back(j);
137 else c[1].J.push_back(j - dim.second);
138 }
139 }
140 };
141
142 template<typename scalar_t> class WorkFactor {
143 public:
144 std::vector<WorkFactor<scalar_t>> c;
145 DenseMatrix<scalar_t> Dt; // (U.cols x U.cols) \tilde(D)
146 DenseMatrix<scalar_t> Vt1; // (U.cols x V.cols) bottom part of \tilde{V}
147 };
148#endif // DOXYGEN_SHOULD_SKIP_THIS
149
150
161 template<typename scalar_t> class HSSFactors {
162 public:
163
171 std::size_t memory() {
172 return sizeof(*this) + L_.memory() + Vt0_.memory()
173 + W1_.memory() + Q_.memory() + D_.memory()
174 + sizeof(int)*piv_.size();
175 }
176
183 std::size_t nonzeros() const {
184 return L_.nonzeros() + Vt0_.nonzeros() + W1_.nonzeros()
185 + Q_.nonzeros() + D_.nonzeros();
186 }
187
191 const DenseMatrix<scalar_t>& Vhat() const { return Vt0_; }
195 DenseMatrix<scalar_t>& Vhat() { return Vt0_; }
196
197 private:
198 DenseMatrix<scalar_t> L_; // (U.rows-U.cols x U.rows-U.cols),
199 // empty at the root
200 DenseMatrix<scalar_t> Vt0_; // (U.rows-U.cols x V.cols)
201 // at the root, _Vt0 stored Vhat
202 DenseMatrix<scalar_t> W1_; // (U.cols x U.rows) bottom part of W
203 // if (U.rows == U.cols)
204 // then W == I and is not stored!
205 DenseMatrix<scalar_t> Q_; // (U.rows x U.rows) Q from LQ(W0)
206 // if (U.rows == U.cols)
207 // then Q == I and is not stored!
208 DenseMatrix<scalar_t> D_; // (U.rows x U.rows) at the root holds LU(D)
209 // else empty
210 std::vector<int> piv_; // hold permutation from LU(D) at root
211 template<typename T> friend class HSSMatrix;
212 template<typename T> friend class HSSMatrixBase;
213 };
214
215#ifndef DOXYGEN_SHOULD_SKIP_THIS
216 template<typename scalar_t> class WorkSolve {
217 public:
218 std::vector<WorkSolve<scalar_t>> c;
219
220 // do we need all these?? x only used in bwd, y only used in fwd??
221 DenseMatrix<scalar_t> z, ft1, y, x;
222
223 // DO NOT STORE reduced_rhs here!!!
224 DenseMatrix<scalar_t> reduced_rhs;
225 std::pair<std::size_t,std::size_t> offset;
226 };
227#endif // DOXYGEN_SHOULD_SKIP_THIS
228
229
230#ifndef DOXYGEN_SHOULD_SKIP_THIS
231 template<typename scalar_t> class AFunctor {
232 using DenseM_t = DenseMatrix<scalar_t>;
233 public:
234 AFunctor(const DenseM_t& A) : _A(A) {}
235 const DenseM_t& _A;
236 void operator()
237 (DenseM_t& Rr, DenseM_t& Rc, DenseM_t& Sr, DenseM_t& Sc) {
238 gemm(Trans::N, Trans::N, scalar_t(1.), _A, Rr, scalar_t(0.), Sr);
239 gemm(Trans::C, Trans::N, scalar_t(1.), _A, Rc, scalar_t(0.), Sc);
240 }
241 void operator()(const std::vector<size_t>& I,
242 const std::vector<size_t>& J, DenseM_t& B) {
243 assert(I.size() == B.rows() && J.size() == B.cols());
244 for (std::size_t j=0; j<J.size(); j++)
245 for (std::size_t i=0; i<I.size(); i++) {
246 assert(I[i] >= 0 && I[i] < _A.rows() &&
247 J[j] >= 0 && J[j] < _A.cols());
248 B(i,j) = _A(I[i], J[j]);
249 }
250 }
251 };
252#endif // DOXYGEN_SHOULD_SKIP_THIS
253
254
255#ifndef DOXYGEN_SHOULD_SKIP_THIS
256 template<typename scalar_t> class WorkDense {
257 public:
258 std::pair<std::size_t,std::size_t> offset;
259 std::vector<WorkDense<scalar_t>> c;
260 DenseMatrix<scalar_t> tmpU, tmpV;
261 };
262#endif // DOXYGEN_SHOULD_SKIP_THIS
263
264 } // end namespace HSS
265} // end namespace strumpack
266
267#endif // HSS_EXTRA_HPP
Contains the DenseMatrix and DenseMatrixWrapper classes, simple wrappers around BLAS/LAPACK style den...
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
Contains data related to ULV factorization of an HSS matrix.
Definition: HSSExtra.hpp:161
DenseMatrix< scalar_t > & Vhat()
Definition: HSSExtra.hpp:195
std::size_t memory()
Definition: HSSExtra.hpp:171
std::size_t nonzeros() const
Definition: HSSExtra.hpp:183
const DenseMatrix< scalar_t > & Vhat() const
Definition: HSSExtra.hpp:191
Abstract base class for Hierarchically Semi-Separable (HSS) matrices.
Definition: HSSMatrixBase.hpp:83
Class to represent a sequential/threaded Hierarchically Semi-Separable matrix.
Definition: HSSMatrix.hpp:80
State
Definition: HSSExtra.hpp:46
Definition: StrumpackOptions.hpp:43
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)