74 using real_t =
typename RealType<scalar_t>::value_type;
76 std::function<void(
const std::vector<std::size_t>&,
83 BLRMatrix(std::size_t m,
const std::vector<std::size_t>& rowtiles,
84 std::size_t n,
const std::vector<std::size_t>& coltiles);
86 std::size_t
rows()
const override {
return m_; }
87 std::size_t
cols()
const override {
return n_; }
91 std::size_t
rank()
const override;
97 std::size_t zeros()
const;
104 void compress(
const extract_t& Aelem,
const adm_t& admissible,
107 void compress_and_factor(
const DenseM_t& A,
const adm_t& admissible,
109 void compress_and_factor(
const extract_t& Aelem,
const adm_t& admissible,
112 void draw(std::ostream& of, std::size_t roff, std::size_t coff)
const;
114 void print(
const std::string& name)
const;
124 const std::vector<int>& piv()
const {
return piv_; }
139 std::size_t rg2t(std::size_t i)
const;
140 std::size_t cg2t(std::size_t j)
const;
142 scalar_t operator()(std::size_t i, std::size_t j)
const;
143 scalar_t& operator()(std::size_t i, std::size_t j);
144 DenseM_t extract(
const std::vector<std::size_t>& I,
145 const std::vector<std::size_t>& J)
const;
148 void decompress_local_columns(
int c_min,
int c_max);
149 void remove_tiles_before_local_column(
int c_min,
int c_max);
151 std::size_t rowblocks()
const {
return nbrows_; }
152 std::size_t colblocks()
const {
return nbcols_; }
153 std::size_t tilerows(std::size_t i)
const {
154 assert(i < rowblocks());
155 return roff_[i+1] - roff_[i];
157 std::size_t tilecols(std::size_t j)
const {
158 assert(j < colblocks());
159 return coff_[j+1] - coff_[j];
161 std::size_t tileroff(std::size_t i)
const {
162 assert(i <= rowblocks());
165 std::size_t tilecoff(std::size_t j)
const {
166 assert(j <= colblocks());
169 std::size_t maxtilerows()
const;
170 std::size_t maxtilecols()
const;
172 BLRTile<scalar_t>& tile(std::size_t i, std::size_t j);
173 const BLRTile<scalar_t>& tile(std::size_t i, std::size_t j)
const;
174 std::unique_ptr<BLRTile<scalar_t>>& block(std::size_t i, std::size_t j);
175 DenseMW_t tile(DenseM_t& A, std::size_t i, std::size_t j)
const;
176 DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j);
177 const DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j)
const;
179 void compress_tile(std::size_t i, std::size_t j,
const Opts_t& opts);
180 void fill(scalar_t v);
181 void fill_col(scalar_t v, std::size_t k, std::size_t CP);
184 construct_and_partial_factor(DenseM_t& A11, DenseM_t& A12,
185 DenseM_t& A21, DenseM_t& A22,
186 BLRM_t& B11, BLRM_t& B12, BLRM_t& B21,
187 const std::vector<std::size_t>& tiles1,
188 const std::vector<std::size_t>& tiles2,
189 const adm_t& admissible,
192#if defined(STRUMPACK_USE_GPU)
194 construct_and_partial_factor_gpu(DenseM_t& A11, DenseM_t& A12,
195 DenseM_t& A21, DenseM_t& A22,
196 BLRM_t& B11, BLRM_t& B12, BLRM_t& B21,
197 const std::vector<std::size_t>& tiles1,
198 const std::vector<std::size_t>& tiles2,
199 const adm_t& admissible,
200 VectorPool<scalar_t>& workspace,
205 construct_and_partial_factor(BLRM_t& B11, BLRM_t& B12,
206 BLRM_t& B21, BLRM_t& B22,
207 const std::vector<std::size_t>& tiles1,
208 const std::vector<std::size_t>& tiles2,
209 const adm_t& admissible,
213 construct_and_partial_factor_col(BLRM_t& B11, BLRM_t& B12,
214 BLRM_t& B21, BLRM_t& B22,
215 const std::vector<std::size_t>& tiles1,
216 const std::vector<std::size_t>& tiles2,
217 const adm_t& admissible,
219 const std::function<
void
220 (
int,
bool, std::size_t)>& blockcol);
223 construct_and_partial_factor(std::size_t n1, std::size_t n2,
224 const extract_t& A11,
const extract_t& A12,
225 const extract_t& A21,
const extract_t& A22,
226 BLRM_t& B11, BLRM_t& B12,
227 BLRM_t& B21, BLRM_t& B22,
228 const std::vector<std::size_t>& tiles1,
229 const std::vector<std::size_t>& tiles2,
230 const adm_t& admissible,
231 const BLROptions<scalar_t>& opts);
234 trsmLNU_gemm(
const BLRM_t& F1,
const BLRM_t& F2,
235 DenseM_t& B1, DenseM_t& B2,
int task_depth);
238 gemm_trsmUNN(
const BLRM_t& F1,
const BLRM_t& F2,
239 DenseM_t& B1, DenseM_t& B2,
int task_depth);
242 std::size_t m_ = 0, n_ = 0, nbrows_ = 0, nbcols_ = 0;
243 std::vector<std::size_t> roff_, coff_, cl2l_, rl2l_;
244 std::vector<std::unique_ptr<BLRTile<scalar_t>>> blocks_;
245 std::vector<int> piv_;
247 void create_dense_tile(std::size_t i, std::size_t j, DenseM_t& A);
248 void create_dense_tile(std::size_t i, std::size_t j,
249 const extract_t& Aelem);
250 void create_dense_tile_left_looking(std::size_t i, std::size_t j,
251 const extract_t& Aelem);
252 void create_dense_tile_left_looking(std::size_t i, std::size_t j,
254 const extract_t& Aelem,
257 void create_LR_tile(std::size_t i, std::size_t j,
258 DenseM_t& A,
const Opts_t& opts);
259 void create_LR_tile(std::size_t i, std::size_t j,
260 const extract_t& A,
const Opts_t& opts);
262#if defined(STRUMPACK_USE_GPU)
263 void create_from_column_major_gpu(DenseM_t& A, scalar_t* work);
266 void create_LR_tile_left_looking(std::size_t i, std::size_t j,
267 const extract_t& Aelem,
270 void create_LR_tile_left_looking(std::size_t i, std::size_t j,
272 const extract_t& Aelem,
273 const BLRM_t& B21,
const BLRM_t& B12,
275 void LUAR_B11(std::size_t i, std::size_t j, std::size_t kmax,
276 DenseM_t& A11,
const Opts_t& opts,
int* B);
277 void LUAR_B12(std::size_t i, std::size_t j, std::size_t kmax,
278 BLRM_t& B11, DenseM_t& A12,
279 const Opts_t& opts,
int* B);
280 void LUAR_B21(std::size_t i, std::size_t j, std::size_t kmax,
281 BLRM_t& B11, DenseM_t& A21,
282 const Opts_t& opts,
int* B);
284 template<
typename T>
friend
285 void draw(
const BLRMatrix<T>& H,
const std::string& name);
286 template<
typename T,
typename I>
friend class BLRExtendAdd;
289 using structured::StructuredMatrix<scalar_t>
::mult;
290 using structured::StructuredMatrix<scalar_t>
::solve;