128 using vec_t = std::vector<std::size_t>;
135 const vec_t&
Rt,
const vec_t&
Ct);
137 std::size_t
rows()
const override {
return rows_; }
138 std::size_t
cols()
const override {
return cols_; }
142 std::size_t
rank()
const override;
143 std::size_t total_memory()
const;
144 std::size_t total_nonzeros()
const;
145 std::size_t max_rank()
const;
147 real_t normF()
const;
149 const MPIComm& Comm()
const {
return grid_->Comm(); }
153 bool active()
const {
return grid_->active(); }
155 void fill(scalar_t v);
156 void fill_col(scalar_t v, std::size_t k, std::size_t CP);
159 std::vector<int>
factor(
const Opts_t& opts);
160 std::vector<int>
factor(
const adm_t& adm,
const Opts_t& opts);
161 std::vector<int> factor_col(
const adm_t& adm,
const Opts_t& opts,
163 <
void(
int,
bool, std::size_t)>& blockcol);
165 void laswp(
const std::vector<int>& piv,
bool fwd);
167#if defined(STRUMPACK_USE_GPU)
168 static std::vector<int>
169 partial_factor_gpu(BLRMPI_t& A11, BLRMPI_t& A12,
170 BLRMPI_t& A21, BLRMPI_t& A22,
171 const adm_t& adm,
const Opts_t& opts);
174 static std::vector<int>
175 partial_factor(BLRMPI_t& A11, BLRMPI_t& A12,
176 BLRMPI_t& A21, BLRMPI_t& A22,
177 const adm_t& adm,
const Opts_t& opts);
179 static std::vector<int>
180 partial_factor_col(BLRMPI_t& F11, BLRMPI_t& F12, BLRMPI_t& F21,
181 BLRMPI_t& F22,
const adm_t& adm,
const Opts_t& opts,
182 const std::function<
void(
int,
bool, std::size_t)>& blockcol);
184 void compress(
const Opts_t& opts);
187 BLRMPI_t from_ScaLAPACK(
const DistM_t& A,
const ProcessorGrid2D& g,
190 BLRMPI_t from_ScaLAPACK(
const DistM_t& A,
const ProcessorGrid2D& g,
191 const vec_t& Rt,
const vec_t& Ct);
192 DistM_t to_ScaLAPACK(
const BLACSGrid* g)
const;
193 void to_ScaLAPACK(DistM_t& A)
const;
195 void print(
const std::string& name);
197 std::size_t rowblocks()
const {
return brows_; }
198 std::size_t colblocks()
const {
return bcols_; }
199 std::size_t rowblockslocal()
const {
return lbrows_; }
200 std::size_t colblockslocal()
const {
return lbcols_; }
201 std::size_t tilerows(std::size_t i)
const {
return roff_[i+1] - roff_[i]; }
202 std::size_t tilecols(std::size_t j)
const {
return coff_[j+1] - coff_[j]; }
203 std::size_t tileroff(std::size_t i)
const { assert(i <= rowblocks());
return roff_[i]; }
204 std::size_t tilecoff(std::size_t j)
const { assert(j <= colblocks());
return coff_[j]; }
205 std::size_t maxtilerows()
const;
206 std::size_t maxtilecols()
const;
208 int rg2p(std::size_t i)
const;
209 int cg2p(std::size_t j)
const;
210 std::size_t rl2g(std::size_t i)
const;
211 std::size_t cl2g(std::size_t j)
const;
212 std::size_t rg2t(std::size_t i)
const;
213 std::size_t cg2t(std::size_t j)
const;
215 std::size_t lrows()
const {
return lrows_; }
216 std::size_t lcols()
const {
return lcols_; }
226 void decompress_local_columns(
int c_min,
int c_max);
227 void remove_tiles_before_local_column(
int c_min,
int c_max);
237 std::size_t rows_ = 0, cols_ = 0, lrows_ = 0, lcols_ = 0;
238 std::size_t brows_ = 0, bcols_ = 0, lbrows_ = 0, lbcols_ = 0;
240 vec_t rl2t_, cl2t_, rl2l_, cl2l_, rl2g_, cl2g_;
241 std::vector<std::unique_ptr<BLRTile<scalar_t>>> blocks_;
244 std::size_t tilerg2l(std::size_t
i)
const {
245 assert(
int(
i % grid_->nprows()) == grid_->prow());
246 return i / grid_->nprows();
248 std::size_t tilecg2l(std::size_t
j)
const {
249 assert(
int(
j % grid_->npcols()) == grid_->pcol());
250 return j / grid_->npcols();
253 BLRTile<scalar_t>& tile(std::size_t i, std::size_t j) {
254 return ltile(tilerg2l(i), tilecg2l(j));
256 const BLRTile<scalar_t>& tile(std::size_t i, std::size_t j)
const {
257 return ltile(tilerg2l(i), tilecg2l(j));
259 DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j) {
260 return ltile_dense(tilerg2l(i), tilecg2l(j));
262 const DenseTile<scalar_t>& tile_dense(std::size_t i, std::size_t j)
const {
263 return ltile_dense(tilerg2l(i), tilecg2l(j));
266 BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j) {
267 assert(i < rowblockslocal() && j < colblockslocal());
268 return *blocks_[i+j*rowblockslocal()].get();
270 const BLRTile<scalar_t>& ltile(std::size_t i, std::size_t j)
const {
271 assert(i < rowblockslocal() && j < colblockslocal());
272 return *blocks_[i+j*rowblockslocal()].get();
275 DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j) {
276 assert(i < rowblockslocal() && j < colblockslocal());
277 assert(
dynamic_cast<DenseTile<scalar_t>*
>
278 (blocks_[i+j*rowblockslocal()].get()));
279 return *
static_cast<DenseTile<scalar_t>*
>
280 (blocks_[i+j*rowblockslocal()].get());
282 const DenseTile<scalar_t>& ltile_dense(std::size_t i, std::size_t j)
const {
283 assert(i < rowblockslocal() && j < colblockslocal());
284 assert(
dynamic_cast<const DenseTile<scalar_t>*
>
285 (blocks_[i+j*rowblockslocal()].get()));
286 return *
static_cast<const DenseTile<scalar_t>*
>
287 (blocks_[i+j*rowblockslocal()].get());
290 std::unique_ptr<BLRTile<scalar_t>>&
291 block(std::size_t i, std::size_t j) {
292 assert(i < rowblocks() && j < colblocks());
293 return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
295 const std::unique_ptr<BLRTile<scalar_t>>&
296 block(std::size_t i, std::size_t j)
const {
297 assert(i < rowblocks() && j < colblocks());
298 return blocks_[tilerg2l(i)+tilecg2l(j)*rowblockslocal()];
301 std::unique_ptr<BLRTile<scalar_t>>&
302 lblock(std::size_t i, std::size_t j) {
303 assert(i < rowblockslocal() && j < colblockslocal());
304 return blocks_[i+j*rowblockslocal()];
306 const std::unique_ptr<BLRTile<scalar_t>>&
307 lblock(std::size_t i, std::size_t j)
const {
308 assert(i < rowblockslocal() && j < colblockslocal());
309 return blocks_[i+j*rowblockslocal()];
312 void compress_tile(std::size_t i, std::size_t j,
const Opts_t& opts);
315 bcast_dense_tile_along_col(std::size_t i, std::size_t j)
const;
317 bcast_dense_tile_along_row(std::size_t i, std::size_t j)
const;
319 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
320 bcast_row_of_tiles_along_cols(std::size_t i,
321 std::size_t j0, std::size_t j1)
const;
322 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
323 bcast_col_of_tiles_along_rows(std::size_t i0, std::size_t i1,
324 std::size_t j)
const;
326#if defined(STRUMPACK_USE_GPU)
327 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
328 bcast_row_of_tiles_along_cols_gpu(std::size_t i,
329 std::size_t j0, std::size_t j1,
330 scalar_t* dptr, scalar_t* pinned,
331 bool gpu_aware)
const;
332 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
333 bcast_col_of_tiles_along_rows_gpu(std::size_t i0, std::size_t i1,
335 scalar_t* dptr, scalar_t* pinned,
336 bool gpu_aware)
const;
339 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
340 gather_rows(std::size_t i0, std::size_t i1,
341 std::size_t j0, std::size_t j1)
const;
343 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
344 gather_cols(std::size_t i0, std::size_t i1,
345 std::size_t j0, std::size_t j1)
const;
348 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
349 send_row_of_tiles(std::size_t src_row, std::size_t dest_row,
350 std::size_t j0, std::size_t j1)
const;
351 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
352 send_col_of_tiles(std::size_t src_col, std::size_t dest_col,
353 std::size_t i0, std::size_t i1)
const;
355 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
356 gather_row(std::size_t i0, std::size_t k,
357 std::size_t j0, std::size_t j1)
const;
359 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
360 gather_col(std::size_t i0, std::size_t i1,
361 std::size_t j0, std::size_t k)
const;
363 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
364 gather_rows_A22(std::size_t i1, std::size_t j1)
const;
366 std::vector<std::unique_ptr<BLRTile<scalar_t>>>
367 gather_cols_A22(std::size_t i1, std::size_t j1)
const;
369 template<
typename T>
friend void
372 template<
typename T>
friend void
373 gemv(
Trans ta,
T alpha,
const BLRMatrixMPI<T>& a,
374 const BLRMatrixMPI<T>& x,
T beta, BLRMatrixMPI<T>& y);
375 template<
typename T>
friend void
377 const BLRMatrixMPI<T>& a, BLRMatrixMPI<T>& b);
378 template<
typename T>
friend void
379 gemm(
Trans ta,
Trans tb,
T alpha,
const BLRMatrixMPI<T>& a,
380 const BLRMatrixMPI<T>& b,
T beta, BLRMatrixMPI<T>& c);
382#if defined(STRUMPACK_USE_GPU)
383 void move_to_gpu(gpu::Stream& s, scalar_t* dptr, scalar_t* pinned);
384 void move_to_cpu(gpu::Stream& s, scalar_t* pinned);
385 void move_row_to_cpu(
int i, gpu::Stream& s, scalar_t* pinned);
386 void move_col_to_cpu(
int j, gpu::Stream& s, scalar_t* pinned);
390 using structured::StructuredMatrix<scalar_t>
::factor;
393 template<
typename T,
typename I>
friend class BLRExtendAdd;