34 #ifndef DENSE_MATRIX_HPP 
   35 #define DENSE_MATRIX_HPP 
   41 #include "misc/RandomWrapper.hpp" 
   42 #include "BLASLAPACKWrapper.hpp" 
   57   inline Trans c2T(
char op) {
 
   63       std::cerr << 
"ERROR: char " << op << 
" not recognized," 
   64                 << 
" should be one of n/N, t/T or c/C" << std::endl;
 
  139     using real_t = 
typename RealType<scalar_t>::value_type;
 
  142     scalar_t* data_ = 
nullptr;
 
  143     std::size_t rows_ = 0;
 
  144     std::size_t cols_ = 0;
 
  173                 const std::function<scalar_t(std::size_t,std::size_t)>& A);
 
  188                 const scalar_t* D, std::size_t 
ld);
 
  206                 std::size_t i, std::size_t j);
 
  227     inline std::size_t 
rows()
 const { 
return rows_; }
 
  230     inline std::size_t 
cols()
 const { 
return cols_; }
 
  236     inline std::size_t 
ld()
 const { 
return ld_; }
 
  241     inline const scalar_t* 
data()
 const { 
return data_; }
 
  246     inline scalar_t* 
data() { 
return data_; }
 
  252     inline scalar_t* 
end() { 
return data_ + ld_ * cols_; }
 
  258     inline const scalar_t* 
end()
 const {
 
  259       if (rows_ == 0 || cols_ == 0) 
return data_;
 
  260       return data_ + ld_ * cols_;
 
  271     inline const scalar_t& 
operator()(std::size_t i, std::size_t j)
 const 
  272     { assert(i<=
rows() && j<=
cols()); 
return data_[i+ld_*j]; }
 
  282     inline const scalar_t* 
ptr(std::size_t i, std::size_t j)
 const 
  283     { assert(i<=
rows() && j<=
cols()); 
return data_+i+ld_*j; }
 
  294     { assert(i<=
rows() && j<=
cols()); 
return data_[i+ld_*j]; }
 
  304     inline scalar_t* 
ptr(std::size_t i, std::size_t j)
 
  305     { assert(i<=
rows() && j<=
cols()); 
return data_+i+ld_*j; }
 
  328     void print(std::string name, 
bool all=
false, 
int width=8) 
const;
 
  340                        std::string filename, 
int width=8) 
const;
 
  369     void fill(
const std::function<scalar_t(std::size_t,std::size_t)>& A);
 
  393     void resize(std::size_t m, std::size_t n);
 
  417               std::size_t i=0, std::size_t j=0);
 
  427     void copy(
const scalar_t* B, std::size_t ldb);
 
  447     void laswp(
const std::vector<int>& P, 
bool fwd);
 
  473     void lapmr(
const std::vector<int>& P, 
bool fwd);
 
  487     void lapmt(
const std::vector<int>& P, 
bool fwd);
 
  551             const std::vector<std::size_t>& J) 
const;
 
  624     scale_rows_real(
const std::vector<real_t>& D, 
int depth=0);
 
  647     (
const std::vector<scalar_t>& D, 
int depth=0);
 
  676       return sizeof(scalar_t) * 
rows() * 
cols();
 
  701     int LU(std::vector<int>& piv, 
int depth=0);
 
  716     std::vector<int> 
LU(
int depth=0);
 
  738     std::vector<int> 
LDLt(
int depth=0);
 
  769      const std::vector<int>& piv, 
int depth=0) 
const;
 
  885      std::vector<std::size_t>& ind, real_t rel_tol,
 
  886      real_t abs_tol, 
int max_rank, 
int depth);
 
  907      std::vector<std::size_t>& ind, real_t rel_tol, real_t abs_tol,
 
  908      int max_rank, 
int depth) 
const;
 
  930      real_t rel_tol, real_t abs_tol, 
int max_rank, 
int depth) 
const;
 
  969     void write(
const std::string& fname) 
const;
 
  982      std::vector<std::size_t>& ind, real_t rel_tol,
 
  983      real_t abs_tol, 
int max_rank, 
int depth);
 
  987     template<
typename T> 
friend std::ofstream&
 
  989     template<
typename T> 
friend std::ifstream&
 
 1014   template<
typename scalar_t>
 
 1033                        scalar_t* D, std::size_t 
ld) {
 
 1034       this->data_ = D; this->rows_ = m; this->cols_ = n;
 
 1035       this->ld_ = std::max(std::size_t(1), 
ld);
 
 1052                        std::size_t i, std::size_t j)
 
 1054       assert(i+m <= D.
rows());
 
 1055       assert(j+n <= D.
cols());
 
 1072       this->rows_ = 0; this->cols_ = 0;
 
 1073       this->ld_ = 1; this->data_ = 
nullptr;
 
 1085     std::size_t 
memory()
 const override { 
return 0; }
 
 1143       this->data_ = D.data(); this->rows_ = D.rows();
 
 1144       this->cols_ = D.cols(); this->ld_ = D.ld(); 
return *
this; }
 
 1156       assert(a.
rows()==this->rows() && a.
cols()==this->cols());
 
 1157       for (std::size_t j=0; j<this->
cols(); j++)
 
 1158         for (std::size_t i=0; i<this->
rows(); i++)
 
 1159           this->
operator()(i, j) = a(i, j);
 
 1177   template<
typename scalar_t>
 
 1178   std::unique_ptr<const DenseMatrixWrapper<scalar_t>>
 
 1180   (std::size_t m, std::size_t n, 
const scalar_t* D, std::size_t ld) {
 
 1181     return std::unique_ptr<const DenseMatrixWrapper<scalar_t>>
 
 1203   template<
typename scalar_t>
 
 1204   std::unique_ptr<const DenseMatrixWrapper<scalar_t>>
 
 1207    std::size_t i, std::size_t j) {
 
 1208     return std::unique_ptr<const DenseMatrixWrapper<scalar_t>>
 
 1230   template<
typename scalar_from_t, 
typename scalar_to_t> 
void 
 1233        std::size_t ib, std::size_t jb) {
 
 1234     for (std::size_t j=0; j<n; j++)
 
 1235       for (std::size_t i=0; i<m; i++)
 
 1236         b(ib+i, jb+j) = 
static_cast<scalar_to_t
>(a(ia+i, ja+j));
 
 1249   template<
typename scalar_from_t, 
typename scalar_to_t> 
void 
 1251        std::size_t ib=0, std::size_t jb=0) {
 
 1263   template<
typename scalar_t> 
void 
 1265     for (std::size_t j=0; j<a.
cols(); j++)
 
 1266       for (std::size_t i=0; i<a.
rows(); i++)
 
 1267         b[i+j*ldb] = a(i, j);
 
 1279   template<
typename scalar_t> DenseMatrix<scalar_t>
 
 1296   template<
typename scalar_t> DenseMatrix<scalar_t>
 
 1312   template<
typename scalar_t> DenseMatrix<scalar_t>
 
 1313   eye(std::size_t m, std::size_t n) {
 
 1338   template<
typename scalar_t> 
void 
 1343   template<
typename scalar_t> 
void 
 1345        const scalar_t* b, 
int ldb, scalar_t beta,
 
 1348   template<
typename scalar_t> 
void 
 1351        scalar_t* c, 
int ldc, 
int depth=0);
 
 1362   template<
typename scalar_t> 
void 
 1380   template<
typename scalar_t> 
void 
 1393   template<
typename scalar_t> 
void 
 1405   template<
typename scalar_t> 
void 
 1418   template<
typename scalar_t> 
void 
 1420        const scalar_t* x, 
int incx, scalar_t beta,
 
 1431   template<
typename scalar_t> 
void 
 1434        scalar_t* y, 
int incy, 
int depth=0);
 
 1444   template<
typename scalar_t> 
void 
 1446        const scalar_t* x, 
int incx, scalar_t beta,
 
 1447        scalar_t* y, 
int incy, 
int depth=0);
 
 1451   template<
typename scalar_t> 
long long int 
 1453     return (is_complex<scalar_t>() ? 4:1) *
 
 1454       blas::getrf_flops(a.
rows(), a.
cols());
 
 1458   template<
typename scalar_t> 
long long int 
 1460     return (is_complex<scalar_t>() ? 4:1) *
 
 1461       blas::getrs_flops(b.
rows(), b.
cols());
 
 1465   template<
typename scalar_t> 
long long int 
 1467     auto minrc = std::min(a.
rows(), a.
cols());
 
 1468     return (is_complex<scalar_t>() ? 4:1) *
 
 1469       (blas::gelqf_flops(a.
rows(), a.
cols()) +
 
 1470        blas::xxglq_flops(a.
cols(), a.
cols(), minrc));
 
 1474   template<
typename scalar_t> 
long long int 
 1476     return (is_complex<scalar_t>() ? 4:1) *
 
 1477       (blas::geqp3_flops(a.
cols(), a.
rows()) +
 
 1482   template<
typename scalar_t> 
long long int 
 1485     return (is_complex<scalar_t>() ? 4:1) *
 
 1490   template<
typename scalar_t> 
long long int 
 1494     return (is_complex<scalar_t>() ? 4:1) *
 
 1502   template<
typename scalar_t> 
long long int 
 1506     return (is_complex<scalar_t>() ? 4:1) *
 
 1512   template<
typename scalar_t> 
long long int 
 1514     auto minrc = std::min(a.
rows(), a.
cols());
 
 1515     return (is_complex<scalar_t>() ? 4:1) *
 
 1516       (blas::geqrf_flops(a.
rows(), minrc) +
 
 1517        blas::xxgqr_flops(a.
rows(), minrc, minrc));
 
 1529   template<
typename scalar_t,
typename cast_t>
 
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use ...
Definition: DenseMatrix.hpp:1015
DenseMatrix< scalar_t > & operator=(const DenseMatrix< scalar_t > &a) override
Definition: DenseMatrix.hpp:1155
virtual ~DenseMatrixWrapper()
Definition: DenseMatrix.hpp:1063
DenseMatrixWrapper(const DenseMatrixWrapper< scalar_t > &)=default
DenseMatrixWrapper()
Definition: DenseMatrix.hpp:1020
DenseMatrixWrapper(const DenseMatrix< scalar_t > &)=delete
DenseMatrixWrapper(std::size_t m, std::size_t n, scalar_t *D, std::size_t ld)
Definition: DenseMatrix.hpp:1032
void clear() override
Definition: DenseMatrix.hpp:1071
std::size_t nonzeros() const override
Definition: DenseMatrix.hpp:1096
DenseMatrixWrapper< scalar_t > & operator=(DenseMatrixWrapper< scalar_t > &&D)
Definition: DenseMatrix.hpp:1142
DenseMatrixWrapper(std::size_t m, std::size_t n, DenseMatrix< scalar_t > &D, std::size_t i, std::size_t j)
Definition: DenseMatrix.hpp:1051
std::size_t memory() const override
Definition: DenseMatrix.hpp:1085
DenseMatrixWrapper(DenseMatrixWrapper< scalar_t > &&)=default
DenseMatrixWrapper(DenseMatrix< scalar_t > &&)=delete
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK rou...
Definition: DenseMatrix.hpp:138
void lapmt(const std::vector< int > &P, bool fwd)
DenseMatrix(DenseMatrix< scalar_t > &&D)
void copy(const scalar_t *B, std::size_t ldb)
void solve_LDLt_in_place(DenseMatrix< scalar_t > &b, const std::vector< int > &piv, int depth=0) const
std::size_t cols() const
Definition: DenseMatrix.hpp:230
DenseMatrix< scalar_t > & add(const DenseMatrix< scalar_t > &B, int depth=0)
void laswp(const int *P, bool fwd)
scalar_t * data()
Definition: DenseMatrix.hpp:246
void fill(const std::function< scalar_t(std::size_t, std::size_t)> &A)
virtual std::size_t nonzeros() const
Definition: DenseMatrix.hpp:683
const scalar_t * data() const
Definition: DenseMatrix.hpp:241
void random(random::RandomGeneratorBase< typename RealType< scalar_t >::value_type > &rgen)
void copy(const DenseMatrix< scalar_t > &B, std::size_t i=0, std::size_t j=0)
const scalar_t & operator()(std::size_t i, std::size_t j) const
Definition: DenseMatrix.hpp:271
const scalar_t * ptr(std::size_t i, std::size_t j) const
Definition: DenseMatrix.hpp:282
DenseMatrix< scalar_t > solve(const DenseMatrix< scalar_t > &b, const std::vector< int > &piv, int depth=0) const
DenseMatrix< scalar_t > transpose() const
DenseMatrix< scalar_t > & scatter_rows_add(const std::vector< std::size_t > &I, const DenseMatrix< scalar_t > &B, int depth)
DenseMatrix(std::size_t m, std::size_t n, const scalar_t *D, std::size_t ld)
DenseMatrix< scalar_t > & sub(const DenseMatrix< scalar_t > &B, int depth=0)
std::vector< int > LDLt(int depth=0)
std::size_t rows() const
Definition: DenseMatrix.hpp:227
std::size_t ld() const
Definition: DenseMatrix.hpp:236
scalar_t & operator()(std::size_t i, std::size_t j)
Definition: DenseMatrix.hpp:293
DenseMatrix(std::size_t m, std::size_t n, const DenseMatrix< scalar_t > &D, std::size_t i, std::size_t j)
DenseMatrix< scalar_t > & scaled_add(scalar_t alpha, const DenseMatrix< scalar_t > &B, int depth=0)
void print(std::string name, bool all=false, int width=8) const
void laswp(const std::vector< int > &P, bool fwd)
void solve_LU_in_place(DenseMatrix< scalar_t > &b, const std::vector< int > &piv, int depth=0) const
void ID_column(DenseMatrix< scalar_t > &X, std::vector< int > &piv, std::vector< std::size_t > &ind, real_t rel_tol, real_t abs_tol, int max_rank, int depth)
virtual DenseMatrix< scalar_t > & operator=(const DenseMatrix< scalar_t > &D)
void lapmr(const std::vector< int > &P, bool fwd)
DenseMatrix(const DenseMatrix< scalar_t > &D)
void LQ(DenseMatrix< scalar_t > &L, DenseMatrix< scalar_t > &Q, int depth) const
void print_to_file(std::string name, std::string filename, int width=8) const
void extract_cols(const std::vector< std::size_t > &I, DenseMatrix< scalar_t > &B) const
DenseMatrix< scalar_t > extract_rows(const std::vector< std::size_t > &I) const
std::vector< int > LU(int depth=0)
DenseMatrix< scalar_t > & scale_and_add(scalar_t alpha, const DenseMatrix< scalar_t > &B, int depth=0)
void hconcat(const DenseMatrix< scalar_t > &b)
virtual std::size_t memory() const
Definition: DenseMatrix.hpp:675
DenseMatrix< scalar_t > & scale(scalar_t alpha, int depth=0)
DenseMatrix< scalar_t > & div_rows(const std::vector< scalar_t > &D, int depth=0)
void solve_LU_in_place(DenseMatrix< scalar_t > &b, const int *piv, int depth=0) const
int syev(Jobz job, UpLo ul, std::vector< scalar_t > &lambda)
DenseMatrix< scalar_t > extract(const std::vector< std::size_t > &I, const std::vector< std::size_t > &J) const
DenseMatrix< scalar_t > & scale_rows(const scalar_t *D, int depth=0)
scalar_t * ptr(std::size_t i, std::size_t j)
Definition: DenseMatrix.hpp:304
void transpose(DenseMatrix< scalar_t > &X) const
const scalar_t * end() const
Definition: DenseMatrix.hpp:258
DenseMatrix< scalar_t > extract_cols(const std::vector< std::size_t > &I) const
DenseMatrix(std::size_t m, std::size_t n)
void extract_rows(const std::vector< std::size_t > &I, DenseMatrix< scalar_t > &B) const
void orthogonalize(scalar_t &r_max, scalar_t &r_min, int depth)
int Cholesky(int depth=0)
int LU(std::vector< int > &piv, int depth=0)
void write(const std::string &fname) const
void print() const
Definition: DenseMatrix.hpp:313
void resize(std::size_t m, std::size_t n)
static DenseMatrix< scalar_t > read(const std::string &fname)
void ID_row(DenseMatrix< scalar_t > &X, std::vector< int > &piv, std::vector< std::size_t > &ind, real_t rel_tol, real_t abs_tol, int max_rank, int depth) const
virtual DenseMatrix< scalar_t > & operator=(DenseMatrix< scalar_t > &&D)
DenseMatrix(std::size_t m, std::size_t n, const std::function< scalar_t(std::size_t, std::size_t)> &A)
scalar_t * end()
Definition: DenseMatrix.hpp:252
DenseMatrix< scalar_t > & scale_rows(const std::vector< scalar_t > &D, int depth=0)
void low_rank(DenseMatrix< scalar_t > &U, DenseMatrix< scalar_t > &V, real_t rel_tol, real_t abs_tol, int max_rank, int depth) const
void shift(scalar_t sigma)
std::vector< scalar_t > singular_values() const
2D block cyclicly distributed matrix, as used by ScaLAPACK.
Definition: DistributedMatrix.hpp:84
class to wrap the C++11 random number generator/distribution
Definition: RandomWrapper.hpp:105
Definition: StrumpackOptions.hpp:43
long long int LU_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1452
long long int gemm_flops(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta)
Definition: DenseMatrix.hpp:1491
UpLo
Definition: DenseMatrix.hpp:83
DenseMatrix< scalar_t > vconcat(const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1280
void trmm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
void trsv(UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
DenseMatrix< scalar_t > eye(std::size_t m, std::size_t n)
Definition: DenseMatrix.hpp:1313
void gemv(Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
Trans
Definition: DenseMatrix.hpp:51
void trsm(Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
long long int orthogonalize_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1513
long long int trsm_flops(Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1483
Side
Definition: DenseMatrix.hpp:74
void copy(std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb)
Definition: DenseMatrix.hpp:1231
void gemm(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
CSRMatrix< cast_t, integer_t > cast_matrix(const CSRMatrix< scalar_t, integer_t > &mat)
long long int gemm_flops(Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, scalar_t beta, const DenseMatrix< scalar_t > &c)
Definition: DenseMatrix.hpp:1503
long long int ID_row_flops(const DenseMatrix< scalar_t > &a, int rank)
Definition: DenseMatrix.hpp:1475
long long int LQ_flops(const DenseMatrix< scalar_t > &a)
Definition: DenseMatrix.hpp:1466
long long int solve_flops(const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1459
DenseMatrix< scalar_t > hconcat(const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
Definition: DenseMatrix.hpp:1297
Diag
Definition: DenseMatrix.hpp:92
Jobz
Definition: DenseMatrix.hpp:101
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > ConstDenseMatrixWrapperPtr(std::size_t m, std::size_t n, const scalar_t *D, std::size_t ld)
Definition: DenseMatrix.hpp:1180