strumpack::DistributedMatrix< scalar_t > Class Template Reference

2D block cyclicly distributed matrix, as used by ScaLAPACK. More...

#include <DistributedMatrix.hpp>

Inheritance diagram for strumpack::DistributedMatrix< scalar_t >:

Public Member Functions

 DistributedMatrix ()
 
 DistributedMatrix (const BLACSGrid *g, int M, int N)
 
 DistributedMatrix (const BLACSGrid *g, int M, int N, const std::function< scalar_t(std::size_t, std::size_t)> &A)
 
 DistributedMatrix (const BLACSGrid *g, const DenseMatrix< scalar_t > &m)
 
 DistributedMatrix (const BLACSGrid *g, DenseMatrix< scalar_t > &&m)
 
 DistributedMatrix (const BLACSGrid *g, DenseMatrixWrapper< scalar_t > &&m)
 
 DistributedMatrix (const BLACSGrid *g, int M, int N, const DistributedMatrix< scalar_t > &m, int context_all)
 
 DistributedMatrix (const BLACSGrid *g, int M, int N, int MB, int NB)
 
 DistributedMatrix (const DistributedMatrix< scalar_t > &m)
 
 DistributedMatrix (DistributedMatrix< scalar_t > &&m)
 
virtual ~DistributedMatrix ()
 
DistributedMatrix< scalar_t > & operator= (const DistributedMatrix< scalar_t > &m)
 
DistributedMatrix< scalar_t > & operator= (DistributedMatrix< scalar_t > &&m)
 
const int * desc () const
 
bool active () const
 
const BLACSGridgrid () const
 
const MPICommComm () const
 
MPI_Comm comm () const
 
int ctxt () const
 
int ctxt_all () const
 
virtual int rows () const
 
virtual int cols () const
 
int lrows () const
 
int lcols () const
 
int ld () const
 
int MB () const
 
int NB () const
 
int rowblocks () const
 
int colblocks () const
 
virtual int I () const
 
virtual int J () const
 
virtual void lranges (int &rlo, int &rhi, int &clo, int &chi) const
 
const scalar_t * data () const
 
scalar_t * data ()
 
const scalar_t & operator() (int r, int c) const
 
scalar_t & operator() (int r, int c)
 
int prow () const
 
int pcol () const
 
int nprows () const
 
int npcols () const
 
int npactives () const
 
bool is_master () const
 
int rowl2g (int row) const
 
int coll2g (int col) const
 
int rowg2l (int row) const
 
int colg2l (int col) const
 
int rowg2p (int row) const
 
int colg2p (int col) const
 
int rank (int r, int c) const
 
bool is_local (int r, int c) const
 
const scalar_t & global (int r, int c) const
 
scalar_t & global (int r, int c)
 
void global (int r, int c, scalar_t v)
 
scalar_t all_global (int r, int c) const
 
void print () const
 
void print (std::string name, int precision=15) const
 
void print_to_file (std::string name, std::string filename, int width=8) const
 
void print_to_files (std::string name, int precision=16) const
 
void random ()
 
void random (random::RandomGeneratorBase< typename RealType< scalar_t >::value_type > &rgen)
 
void zero ()
 
void fill (scalar_t a)
 
void fill (const std::function< scalar_t(std::size_t, std::size_t)> &A)
 
void eye ()
 
void shift (scalar_t sigma)
 
scalar_t trace () const
 
void clear ()
 
virtual void resize (std::size_t m, std::size_t n)
 
virtual void hconcat (const DistributedMatrix< scalar_t > &b)
 
DistributedMatrix< scalar_t > transpose () const
 
void mult (Trans op, const DistributedMatrix< scalar_t > &X, DistributedMatrix< scalar_t > &Y) const
 
void laswp (const std::vector< int > &P, bool fwd)
 
DistributedMatrix< scalar_t > extract_rows (const std::vector< std::size_t > &Ir) const
 
DistributedMatrix< scalar_t > extract_cols (const std::vector< std::size_t > &Ic) const
 
DistributedMatrix< scalar_t > extract (const std::vector< std::size_t > &I, const std::vector< std::size_t > &J) const
 
DistributedMatrix< scalar_t > & add (const DistributedMatrix< scalar_t > &B)
 
DistributedMatrix< scalar_t > & scaled_add (scalar_t alpha, const DistributedMatrix< scalar_t > &B)
 
DistributedMatrix< scalar_t > & scale_and_add (scalar_t alpha, const DistributedMatrix< scalar_t > &B)
 
real_t norm () const
 
real_t normF () const
 
real_t norm1 () const
 
real_t normI () const
 
virtual std::size_t memory () const
 
virtual std::size_t total_memory () const
 
virtual std::size_t nonzeros () const
 
virtual std::size_t total_nonzeros () const
 
void scatter (const DenseMatrix< scalar_t > &a)
 
DenseMatrix< scalar_t > gather () const
 
DenseMatrix< scalar_t > all_gather () const
 
DenseMatrix< scalar_t > dense_and_clear ()
 
DenseMatrix< scalar_t > dense () const
 
DenseMatrixWrapper< scalar_t > dense_wrapper ()
 
std::vector< int > LU ()
 
int LU (std::vector< int > &)
 
DistributedMatrix< scalar_t > solve (const DistributedMatrix< scalar_t > &b, const std::vector< int > &piv) const
 
void LQ (DistributedMatrix< scalar_t > &L, DistributedMatrix< scalar_t > &Q) const
 
void orthogonalize (scalar_t &r_max, scalar_t &r_min)
 
void ID_column (DistributedMatrix< scalar_t > &X, std::vector< int > &piv, std::vector< std::size_t > &ind, real_t rel_tol, real_t abs_tol, int max_rank)
 
void ID_row (DistributedMatrix< scalar_t > &X, std::vector< int > &piv, std::vector< std::size_t > &ind, real_t rel_tol, real_t abs_tol, int max_rank, const BLACSGrid *grid_T)
 

Static Public Attributes

static const int default_MB = STRUMPACK_PBLAS_BLOCKSIZE
 
static const int default_NB = STRUMPACK_PBLAS_BLOCKSIZE
 

Detailed Description

template<typename scalar_t>
class strumpack::DistributedMatrix< scalar_t >

2D block cyclicly distributed matrix, as used by ScaLAPACK.

This class represents a 2D block cyclic matrix like used by ScaLAPACK. To create a submatrix of a DistributedMatrix use a DistributedMatrixWrapper.

Several routines in this class are collective on all processes in the process grid. The BLACSGrid passed when constructing the matrix should persist as long as the matrix is in use (in scope).

Template Parameters
scalar_tPossible values for the scalar_t template type are: float, double, std::complex<float> and std::complex<double>.
See also
strumpack::BLACSGrid

Constructor & Destructor Documentation

◆ DistributedMatrix() [1/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( )

Default constructor. The grid will not be initialized.

◆ DistributedMatrix() [2/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( const BLACSGrid g,
int  M,
int  N 
)

Construct a Distributed matrix of size MxN on grid g, using the default blocksize strumpack::DistributedMatrix<scalar_t>::default_MB and strumpack::DistributedMatrix::default_NB. The values of the matrix are not set.

Parameters
gBLACS grid.
Mnumber of (global) columns
Nnumber of (global) rows

◆ DistributedMatrix() [3/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( const BLACSGrid g,
int  M,
int  N,
const std::function< scalar_t(std::size_t, std::size_t)> &  A 
)

Construct a Distributed matrix of size MxN on grid g, using the default blocksize. The values of the matrix are initialized using the specified function.

Parameters
gBLACS grid.
Mnumber of (global) columns
Nnumber of (global) rows
ARoutine to be used to initialize the matrix, by calling this->fill(A).

◆ DistributedMatrix() [4/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( const BLACSGrid g,
const DenseMatrix< scalar_t > &  m 
)

Create a DistributedMatrix from a DenseMatrix. This only works on a BLACS grid with a single process. Values will be copied from the input DenseMatrix into the new DistributedMatrix.

Parameters
gBLACS grid, g->nprows() == 1, g->npcols() == 1.
minput matrix.

◆ DistributedMatrix() [5/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( const BLACSGrid g,
DenseMatrix< scalar_t > &&  m 
)

Create a DistributedMatrix by moving from a DenseMatrix. This only works on a BLACS grid with a single process. The input DenseMatrix will be cleared.

Parameters
gBLACS grid, g->nprows() == 1, g->npcols() == 1.
minput matrix, cleared.

◆ DistributedMatrix() [6/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( const BLACSGrid g,
DenseMatrixWrapper< scalar_t > &&  m 
)

Create a DistributedMatrix by moving from a DenseMatrixWrapper. This only works on a BLACS grid with a single process. Values will be copied from the input DenseMatrix(Wrapper) into the new DistributedMatrix.

Parameters
gBLACS grid, g->nprows() == 1, g->npcols() == 1.
minput matrix.

◆ DistributedMatrix() [7/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( const BLACSGrid g,
int  M,
int  N,
const DistributedMatrix< scalar_t > &  m,
int  context_all 
)

Construct a new DistributedMatrix as a copy of another. Sizes need to be specified since it can be that not all processes involved know the sizes of the source matrix. The input matrix might be on a different BLACSGrid.

Parameters
gBLACS grid of newly to construct matrix
M(global) number of rows of m, and of new matrix
N(global) number of rows of m, and of new matrix
context_allBLACS context containing all processes in g and in m.grid(). If g and m->grid() are the same, then this can be g->ctxt_all(). Ideally this is a BLACS context with all processes arranged as a single row, since the routine called from this constructor, P_GEMR2D, is more efficient when this is the case.

◆ DistributedMatrix() [8/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( const BLACSGrid g,
int  M,
int  N,
int  MB,
int  NB 
)

Construct a new MxN (global sizes) DistributedMatrix on processor grid g, using block sizes MB and NB. Matrix values will not be initialized.

Note that some operations, such as LU factorization require MB == NB.

◆ DistributedMatrix() [9/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( const DistributedMatrix< scalar_t > &  m)

Copy constructor.

◆ DistributedMatrix() [10/10]

template<typename scalar_t >
strumpack::DistributedMatrix< scalar_t >::DistributedMatrix ( DistributedMatrix< scalar_t > &&  m)

Move constructor.

◆ ~DistributedMatrix()

template<typename scalar_t >
virtual strumpack::DistributedMatrix< scalar_t >::~DistributedMatrix ( )
virtual

Destructor.

Member Function Documentation

◆ active()

template<typename scalar_t >
bool strumpack::DistributedMatrix< scalar_t >::active ( ) const
inline

Check whether this rank is active in the grid on which this matrix is defined. Since the 2d grid build from the MPI communicator is made as square as possible, some processes might be idle, i.e., not active.

Returns
grid() && grid()->active()

◆ all_global()

template<typename scalar_t >
scalar_t strumpack::DistributedMatrix< scalar_t >::all_global ( int  r,
int  c 
) const

Return (on all ranks) the value at global position r,c. The ranks that are not active, i.e., not in the BLACSGrid will receive 0. Since the value at position r,c is stored on only one rank, this performs an (expensive) broadcast. So this should not be used in a loop.

Parameters
rglobal row
cglobal column
Returns
value at global position r,c (0 is !active())

◆ colblocks()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::colblocks ( ) const
inline

Return number of block columns stored on this rank

Returns
number of local column blocks.

◆ colg2l()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::colg2l ( int  col) const
inline

Get the global column from the local column. This is 0-based. This requires grid() != NULL.

Parameters
colglobal col, 0 <= col < cols()
Returns
local col corresponding to col, 0 <= local col < lcols().

◆ colg2p()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::colg2p ( int  col) const
inline

Get the process column from the global column. This is 0-based. This requires grid() != NULL.

Parameters
colglobal columns, 0 <= col < cols()
Returns
process column corresponding to col, 0 <= process column < grid()->npcols().

◆ coll2g()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::coll2g ( int  col) const
inline

Get the global columns from the local columns. This is 0-based. This requires grid() != NULL.

Parameters
collocal col, 0 <= col < lcols()
Returns
global column corresponding to col, 0 <= global column < cols().

◆ cols()

template<typename scalar_t >
virtual int strumpack::DistributedMatrix< scalar_t >::cols ( ) const
inlinevirtual

Get the number of global rows in the matrix

Returns
Global number of matrix columns

Reimplemented in strumpack::DistributedMatrixWrapper< scalar_t >.

◆ Comm()

template<typename scalar_t >
const MPIComm & strumpack::DistributedMatrix< scalar_t >::Comm ( ) const
inline

Get the MPIComm (MPI_Comm wrapper) communicator associated with the grid.

Returns
grid()->Comm()

◆ comm()

template<typename scalar_t >
MPI_Comm strumpack::DistributedMatrix< scalar_t >::comm ( ) const
inline

Get the MPI communicator associated with the processor grid.

Returns
grid()->comm()

◆ ctxt()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::ctxt ( ) const
inline

Get the BLACS context from the grid.

Returns
blacs context if grid() != NULL, otherwise -1

◆ ctxt_all()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::ctxt_all ( ) const
inline

Get the global BLACS context from the grid. The global context is the context that includes all MPI ranks from the communicator used to construct the grid.

Returns
blacs context if grid() != NULL, otherwise -1

◆ data() [1/2]

template<typename scalar_t >
scalar_t * strumpack::DistributedMatrix< scalar_t >::data ( )
inline

Return raw pointer to local storage.

Returns
pointer to first element of local storage.

◆ data() [2/2]

template<typename scalar_t >
const scalar_t * strumpack::DistributedMatrix< scalar_t >::data ( ) const
inline

Return raw pointer to local storage.

Returns
pointer to first element of local storage.

◆ desc()

template<typename scalar_t >
const int * strumpack::DistributedMatrix< scalar_t >::desc ( ) const
inline

Return the descriptor array.

Returns
the array descriptor for the distributed matrix.

◆ global() [1/3]

template<typename scalar_t >
scalar_t & strumpack::DistributedMatrix< scalar_t >::global ( int  r,
int  c 
)
inline

Access global element r,c. This requires global element r,c to be stored locally on this rank (else it will result in undefined behavior or trigger an assertion). Requires is_local(r,c).

Parameters
rglobal row
cglobal column
Returns
reference to global element r,c

◆ global() [2/3]

template<typename scalar_t >
const scalar_t & strumpack::DistributedMatrix< scalar_t >::global ( int  r,
int  c 
) const
inline

Access global element r,c. This requires global element r,c to be stored locally on this rank (else it will result in undefined behavior or trigger an assertion). Requires is_local(r,c).

Parameters
rglobal row
cglobal col
Returns
const reference to global element r,c

◆ global() [3/3]

template<typename scalar_t >
void strumpack::DistributedMatrix< scalar_t >::global ( int  r,
int  c,
scalar_t  v 
)
inline

Set global element r,c if global r,c is local to this rank, otherwise do nothing. This requires global element r,c to be stored locally on this rank (else it will result in undefined behavior or trigger an assertion). Requires is_local(r,c).

Parameters
rglobal row
cglobal column
vvalue to set at global position r,c

◆ grid()

template<typename scalar_t >
const BLACSGrid * strumpack::DistributedMatrix< scalar_t >::grid ( ) const
inline

Get a pointer to the 2d processor grid used.

Returns
2d processor grid.

◆ I()

template<typename scalar_t >
virtual int strumpack::DistributedMatrix< scalar_t >::I ( ) const
inlinevirtual

Row index of top left element. This is always 1 for a DistributedMatrix. For a DistributedMatrixWrapper, this can be different from to denote the position in the original matrix.

Returns
Row index of top left element (1-based).

Reimplemented in strumpack::DistributedMatrixWrapper< scalar_t >.

◆ is_local()

template<typename scalar_t >
bool strumpack::DistributedMatrix< scalar_t >::is_local ( int  r,
int  c 
) const
inline

Check whether a global element r,c is local to this rank.

Parameters
rglobal row
cglobal column
Returns
rowg2p(r) == prow() && colg2p(c) == pcol()

◆ is_master()

template<typename scalar_t >
bool strumpack::DistributedMatrix< scalar_t >::is_master ( ) const
inline

Check whether this process is the root/master, i.e., prow() == 0 && pcol() == 0. This requires grid() != NULL.

Returns
grid() && prow() == 0 && pcol() == 0

◆ J()

template<typename scalar_t >
virtual int strumpack::DistributedMatrix< scalar_t >::J ( ) const
inlinevirtual

Columns index of top left element. This is always 1 for a DistributedMatrix. For a DistributedMatrixWrapper, this can be different from to denote the position in the original matrix.

Returns
Column index of top left element (1-based).

Reimplemented in strumpack::DistributedMatrixWrapper< scalar_t >.

◆ lcols()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::lcols ( ) const
inline

Get the number of local columns in the matrix.

Returns
number of local columns stored on this rank.

◆ ld()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::ld ( ) const
inline

Get the leading dimension for the local storage.

Returns
leading dimension of local storage.

◆ lranges()

template<typename scalar_t >
virtual void strumpack::DistributedMatrix< scalar_t >::lranges ( int &  rlo,
int &  rhi,
int &  clo,
int &  chi 
) const
virtual

Get the ranges of local rows and columns. For a DistributedMatrix, this will simply be (0, lrows(), 0, lcols()). For a DistributedMatrixWrapper, this returns the local rows/columns of the DistributedMatrix that correspond the the submatrix represented by the wrapper. The values are 0-based

Parameters
rlooutput parameter, first local row
rhioutput parameter, one more than the last local row
clooutput parameter, first local column
chioutput parameter, one more than the last local column

Reimplemented in strumpack::DistributedMatrixWrapper< scalar_t >.

◆ lrows()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::lrows ( ) const
inline

Get the number of local rows in the matrix.

Returns
number of local rows stored on this rank.

◆ MB()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::MB ( ) const
inline

Get the row blocksize.

Returns
row blocksize

◆ NB()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::NB ( ) const
inline

Get the column blocksize.

Returns
column blocksize

◆ npactives()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::npactives ( ) const
inline

Get the number active processes in the process grid. This can be less than Comm().size(), but should be nprows()*npcols(). This requires grid() != NULL.

Returns
grid()->npactives()

◆ npcols()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::npcols ( ) const
inline

Get the number of process columns in the process grid. This requires grid() != NULL.

Returns
grid()->npcols()

◆ nprows()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::nprows ( ) const
inline

Get the number of process rows in the process grid. This requires grid() != NULL.

Returns
grid()->nprows()

◆ operator()() [1/2]

template<typename scalar_t >
scalar_t & strumpack::DistributedMatrix< scalar_t >::operator() ( int  r,
int  c 
)
inline

Access an element using local indexing. This can be used to read or write the value.

Parameters
rlocal row index
clocal column index
Returns
value at local row r, local column c
See also
rowg2l(), colg2l(), rowl2g(), coll2g()

◆ operator()() [2/2]

template<typename scalar_t >
const scalar_t & strumpack::DistributedMatrix< scalar_t >::operator() ( int  r,
int  c 
) const
inline

Access an element using local indexing. This const version is only for reading the value.

Parameters
rlocal row index
clocal column index
Returns
value at local row r, local column c
See also
rowg2l(), colg2l(), rowl2g(), coll2g()

◆ operator=() [1/2]

template<typename scalar_t >
DistributedMatrix< scalar_t > & strumpack::DistributedMatrix< scalar_t >::operator= ( const DistributedMatrix< scalar_t > &  m)

Copy assignment.

Parameters
mmatrix to copy from

◆ operator=() [2/2]

template<typename scalar_t >
DistributedMatrix< scalar_t > & strumpack::DistributedMatrix< scalar_t >::operator= ( DistributedMatrix< scalar_t > &&  m)

Moce assignment.

Parameters
mmatrix to move from, will be cleared.

◆ pcol()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::pcol ( ) const
inline

Get the column of the process in the process grid. This requires grid() != NULL.

Returns
grid()->pcol()

◆ prow()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::prow ( ) const
inline

Get the row of the process in the process grid. This requires grid() != NULL.

Returns
grid()->prow()

◆ rank()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::rank ( int  r,
int  c 
) const
inline

Get the MPI rank in the communicator used to construct the grid corresponding to global row,column element. This assumes a column major ordering of the grid, see strumpack::BLACSGrid.

Parameters
rglobal row
cglobal column
Returns
rowg2p(r) + colg2p(c) * nprows()

◆ rowblocks()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::rowblocks ( ) const
inline

Return number of block rows stored on this rank

Returns
number of local row blocks.

◆ rowg2l()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::rowg2l ( int  row) const
inline

Get the local row from the global row. This is 0-based. This requires grid() != NULL.

Parameters
rowglobal row, 0 <= row < rows()
Returns
local row corresponding to row, 0 <= local row < lrows().

◆ rowg2p()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::rowg2p ( int  row) const
inline

Get the process row from the global row. This is 0-based. This requires grid() != NULL.

Parameters
rowglobal row, 0 <= row < rows()
Returns
process row corresponding to row, 0 <= process row < grid()->nprows().

◆ rowl2g()

template<typename scalar_t >
int strumpack::DistributedMatrix< scalar_t >::rowl2g ( int  row) const
inline

Get the global row from the local row. This is 0-based. This requires grid() != NULL.

Parameters
rowlocal row, 0 <= row < lrows()
Returns
global row corresponding to row, 0 <= global row < rows().

◆ rows()

template<typename scalar_t >
virtual int strumpack::DistributedMatrix< scalar_t >::rows ( ) const
inlinevirtual

Get the number of global rows in the matrix

Returns
Global number of matrix rows

Reimplemented in strumpack::DistributedMatrixWrapper< scalar_t >.

Member Data Documentation

◆ default_MB

template<typename scalar_t >
const int strumpack::DistributedMatrix< scalar_t >::default_MB = STRUMPACK_PBLAS_BLOCKSIZE
static

Default row blocksize used for 2D block cyclic dustribution. This is set during CMake configuration.

The blocksize is set to a larger value when SLATE is used in order to achieve better GPU performance.

The default value is used when no blocksize is specified during matrix construction. Since the default value is a power of 2, some index mapping calculations between local/global can be optimized.

◆ default_NB

template<typename scalar_t >
const int strumpack::DistributedMatrix< scalar_t >::default_NB = STRUMPACK_PBLAS_BLOCKSIZE
static

Default columns blocksize used for 2D block cyclic dustribution. This is set during CMake configuration.

See also
default_MB

The documentation for this class was generated from the following files: