Namespaces
namespace	BLR

namespace	HODLR

namespace	HSS

namespace	kernel

namespace	random

namespace	structured

Classes
class	BLACSGrid
	This is a small wrapper class around a BLACS grid and a BLACS context. More...

class	CompressedSparseMatrix
	Abstract base class for compressed sparse matrix storage. More...

class	CSRGraph

class	CSRMatrix
	Class for storing a compressed sparse row matrix (single node). More...

class	CSRMatrixMPI
	Block-row distributed compressed sparse row storage. More...

class	DenseMatrix
	This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK routines. More...

class	DenseMatrixWrapper
	Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK routines. However, objects of the DenseMatrixWrapper class do not allocate, own or free any data. More...

class	DistributedMatrix
	2D block cyclicly distributed matrix, as used by ScaLAPACK. More...

class	DistributedMatrixWrapper

class	EliminationTree

class	EliminationTreeMPIDist

class	Equilibration

class	ExtendAdd

class	FrontalMatrixBLRMPI

class	MatchingData

class	MatrixReordering

class	MatrixReorderingMPI

class	MPIComm
	Wrapper class around an MPI_Comm object. More...

class	MPIRequest
	Wrapper around an MPI_Request object. More...

class	SparseSolver
	SparseSolver is the main sequential or multithreaded sparse solver class. More...

class	SparseSolverBase
	SparseSolverBase is the virtual base for both the sequential/multithreaded and distributed sparse solver classes. More...

class	SparseSolverMixedPrecision
	SparseSolverMixedPrecision Allows to use lower precision (float) for the factorization/preconditioner, and higher (double) for the outer iterative solver. See also LAPACK's dsgesv. More...

class	SparseSolverMPIDist
	This is the fully distributed solver. More...

class	SPMVBuffers

class	SPOptions
	Options for the sparse solver. More...

Typedefs
template<typename scalar_t , typename integer_t >
using	StrumpackSparseSolverBase = SparseSolverBase< scalar_t, integer_t >

template<typename scalar_t , typename integer_t >
using	StrumpackSparseSolver = SparseSolver< scalar_t, integer_t >

template<typename factor_t , typename refine_t , typename integer_t >
using	StrumpackSparseSolverMixedPrecision = SparseSolverMixedPrecision< factor_t, refine_t, integer_t >

template<typename scalar_t , typename integer_t >
using	StrumpackSparseSolverMPIDist = SparseSolverMPIDist< scalar_t, integer_t >

Enumerations
enum class	ProportionalMapping { FLOPS , FACTOR_MEMORY , PEAK_MEMORY }

enum class	ReorderingStrategy { NATURAL , METIS , PARMETIS , SCOTCH , PTSCOTCH , RCM , GEOMETRIC , AMD , MMD , AND , MLF , SPECTRAL }

enum class	CompressionType { NONE , HSS , BLR , HODLR , BLR_HODLR , ZFP_BLR_HODLR , LOSSLESS , LOSSY }

enum class	MatchingJob { NONE , MAX_CARDINALITY , MAX_SMALLEST_DIAGONAL , MAX_SMALLEST_DIAGONAL_2 , MAX_DIAGONAL_SUM , MAX_DIAGONAL_PRODUCT_SCALING , COMBBLAS }

enum class	EquilibrationType : char { NONE ='N' , ROW ='R' , COLUMN ='C' , BOTH ='B' }

enum class	GramSchmidtType { CLASSICAL , MODIFIED }

enum class	KrylovSolver { AUTO , DIRECT , REFINE , PREC_GMRES , GMRES , PREC_BICGSTAB , BICGSTAB }

enum class	ReturnCode { SUCCESS , MATRIX_NOT_SET , REORDERING_ERROR , ZERO_PIVOT , NO_CONVERGENCE , INACCURATE_INERTIA }
	Enumeration for the possible return codes. More...

enum class	Trans : char { N ='N' , C ='C' , T ='T' }

enum class	Side : char { L ='L' , R ='R' }

enum class	UpLo : char { U ='U' , L ='L' }

enum class	Diag : char { U ='U' , N ='N' }

enum class	Jobz : char { N ='N' , V ='V' }

enum class	ClusteringAlgorithm { NATURAL , TWO_MEANS , KD_TREE , PCA , COBBLE }

Functions
std::string	get_name (ReorderingStrategy method)

bool	is_parallel (ReorderingStrategy method)

std::string	get_name (CompressionType comp)

MatchingJob	get_matching (int job)

int	get_matching (MatchingJob job)

std::string	get_description (MatchingJob job)

template<typename real_t >
real_t	default_rel_tol ()

template<typename real_t >
real_t	default_abs_tol ()

template<>
float	default_rel_tol ()

template<>
float	default_abs_tol ()

int	default_gpu_streams ()

std::ostream &	operator<< (std::ostream &os, ReturnCode &e)

template<typename scalar_t , typename integer_t , typename cast_t >
CSRMatrix< cast_t, integer_t >	cast_matrix (const CSRMatrix< scalar_t, integer_t > &mat)

template<typename scalar_t , typename integer_t , typename cast_t >
CSRMatrixMPI< cast_t, integer_t >	cast_matrix (const CSRMatrixMPI< scalar_t, integer_t > &mat)

Trans	c2T (char op)

template<typename scalar_t >
std::unique_ptr< const DenseMatrixWrapper< scalar_t > >	ConstDenseMatrixWrapperPtr (std::size_t m, std::size_t n, const scalar_t *D, std::size_t ld)

template<typename scalar_t >
std::unique_ptr< const DenseMatrixWrapper< scalar_t > >	ConstDenseMatrixWrapperPtr (std::size_t m, std::size_t n, const DenseMatrix< scalar_t > &D, std::size_t i, std::size_t j)

template<typename scalar_from_t , typename scalar_to_t >
void	copy (std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb)

template<typename scalar_from_t , typename scalar_to_t >
void	copy (const DenseMatrix< scalar_from_t > &a, DenseMatrix< scalar_to_t > &b, std::size_t ib=0, std::size_t jb=0)

template<typename scalar_t >
void	copy (const DenseMatrix< scalar_t > &a, scalar_t *b, std::size_t ldb)

template<typename scalar_t >
DenseMatrix< scalar_t >	vconcat (const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)

template<typename scalar_t >
DenseMatrix< scalar_t >	hconcat (const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)

template<typename scalar_t >
DenseMatrix< scalar_t >	eye (std::size_t m, std::size_t n)

template<typename scalar_t >
void	gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)

template<typename scalar_t >
void	gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t *b, int ldb, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)

template<typename scalar_t >
void	gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, scalar_t *c, int ldc, int depth=0)

template<typename scalar_t >
void	trmm (Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)

template<typename scalar_t >
void	trsm (Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)

template<typename scalar_t >
void	trsv (UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)

template<typename scalar_t >
void	gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)

template<typename scalar_t >
void	gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t *x, int incx, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)

template<typename scalar_t >
void	gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, scalar_t *y, int incy, int depth=0)

template<typename scalar_t >
void	gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t x, int incx, scalar_t beta, scalar_t y, int incy, int depth=0)

template<typename scalar_t >
long long int	LU_flops (const DenseMatrix< scalar_t > &a)

template<typename scalar_t >
long long int	solve_flops (const DenseMatrix< scalar_t > &b)

template<typename scalar_t >
long long int	LQ_flops (const DenseMatrix< scalar_t > &a)

template<typename scalar_t >
long long int	ID_row_flops (const DenseMatrix< scalar_t > &a, int rank)

template<typename scalar_t >
long long int	trsm_flops (Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)

template<typename scalar_t >
long long int	gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta)

template<typename scalar_t >
long long int	gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, scalar_t beta, const DenseMatrix< scalar_t > &c)

template<typename scalar_t >
long long int	orthogonalize_flops (const DenseMatrix< scalar_t > &a)

template<typename scalar_t , typename cast_t >
DenseMatrix< cast_t >	cast_matrix (const DenseMatrix< scalar_t > &mat)

template<typename scalar_t >
void	copy (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_t > &b, int dest, int context_all)

template<typename scalar_t >
void	copy (std::size_t m, std::size_t n, const DenseMatrix< scalar_t > &a, int src, DistributedMatrix< scalar_t > &b, std::size_t ib, std::size_t jb, int context_all)

template<typename scalar_t >
void	copy (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &a, std::size_t ia, std::size_t ja, DistributedMatrix< scalar_t > &b, std::size_t ib, std::size_t jb, int context_all)

template<typename scalar_t >
long long int	LU_flops (const DistributedMatrix< scalar_t > &a)

template<typename scalar_t >
long long int	solve_flops (const DistributedMatrix< scalar_t > &b)

template<typename scalar_t >
long long int	LQ_flops (const DistributedMatrix< scalar_t > &a)

template<typename scalar_t >
long long int	ID_row_flops (const DistributedMatrix< scalar_t > &a, int rank)

template<typename scalar_t >
long long int	trsm_flops (Side s, scalar_t alpha, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b)

template<typename scalar_t >
long long int	gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, scalar_t beta)

template<typename scalar_t >
long long int	gemv_flops (Trans ta, const DistributedMatrix< scalar_t > &a, scalar_t alpha, scalar_t beta)

template<typename scalar_t >
long long int	orthogonalize_flops (const DistributedMatrix< scalar_t > &a)

template<typename scalar_t >
std::unique_ptr< const DistributedMatrixWrapper< scalar_t > >	ConstDistributedMatrixWrapperPtr (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &D, std::size_t i, std::size_t j)

template<typename scalar_t >
void	gemm (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix< scalar_t > &A, const DistributedMatrix< scalar_t > &B, scalar_t beta, DistributedMatrix< scalar_t > &C)

template<typename scalar_t >
void	trsm (Side s, UpLo u, Trans ta, Diag d, scalar_t alpha, const DistributedMatrix< scalar_t > &A, DistributedMatrix< scalar_t > &B)

template<typename scalar_t >
void	trsv (UpLo ul, Trans ta, Diag d, const DistributedMatrix< scalar_t > &A, DistributedMatrix< scalar_t > &B)

template<typename scalar_t >
void	gemv (Trans ta, scalar_t alpha, const DistributedMatrix< scalar_t > &A, const DistributedMatrix< scalar_t > &X, scalar_t beta, DistributedMatrix< scalar_t > &Y)

template<typename scalar_t >
DistributedMatrix< scalar_t >	vconcat (int cols, int arows, int brows, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, const BLACSGrid *gnew, int cxt_all)

template<typename scalar_t >
void	subgrid_copy_to_buffers (const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, int p0, int npr, int npc, std::vector< std::vector< scalar_t > > &sbuf)

template<typename scalar_t >
void	subproc_copy_to_buffers (const DenseMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, int p0, int npr, int npc, std::vector< std::vector< scalar_t > > &sbuf)

template<typename scalar_t >
void	subgrid_add_from_buffers (const BLACSGrid subg, int master, DistributedMatrix< scalar_t > &b, std::vector< scalar_t > &pbuf)

std::ostream &	operator<< (std::ostream &os, const BLACSGrid *g)

template<typename T >
MPI_Datatype	mpi_type ()

template<>
MPI_Datatype	mpi_type< char > ()

template<>
MPI_Datatype	mpi_type< bool > ()

template<>
MPI_Datatype	mpi_type< int > ()

template<>
MPI_Datatype	mpi_type< long > ()

template<>
MPI_Datatype	mpi_type< unsigned long > ()

template<>
MPI_Datatype	mpi_type< long long int > ()

template<>
MPI_Datatype	mpi_type< float > ()

template<>
MPI_Datatype	mpi_type< double > ()

template<>
MPI_Datatype	mpi_type< std::complex< float > > ()

template<>
MPI_Datatype	mpi_type< std::complex< double > > ()

template<>
MPI_Datatype	mpi_type< std::pair< int, int > > ()

template<>
MPI_Datatype	mpi_type< std::pair< long int, long int > > ()

template<>
MPI_Datatype	mpi_type< std::pair< long long int, long long int > > ()

void	wait_all (std::vector< MPIRequest > &reqs)

void	wait_all (std::vector< MPI_Request > &reqs)

int	mpi_rank (MPI_Comm c=MPI_COMM_WORLD)

int	mpi_nprocs (MPI_Comm c=MPI_COMM_WORLD)

std::string	get_name (ClusteringAlgorithm c)

ClusteringAlgorithm	get_clustering_algorithm (const std::string &c)

template<typename T >
void	pca_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, int *perm)

template<typename T >
structured::ClusterTree	recursive_pca (DenseMatrix< T > &p, std::size_t cluster_size, int *perm)

template<typename T >
void	cobble_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, int *perm)

template<typename T >
structured::ClusterTree	recursive_cobble (DenseMatrix< T > &p, std::size_t cluster_size, int *perm)

template<typename T >
structured::ClusterTree	recursive_2_means (DenseMatrix< T > &p, std::size_t cluster_size, int *perm, std::mt19937 &generator)

template<typename T >
void	kd_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, std::size_t cluster_size, int *perm)

template<typename T >
structured::ClusterTree	recursive_kd (DenseMatrix< T > &p, std::size_t cluster_size, int *perm)

template<typename scalar_t >
structured::ClusterTree	binary_tree_clustering (ClusteringAlgorithm algo, DenseMatrix< scalar_t > &p, std::vector< int > &perm, std::size_t cluster_size)

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t	Euclidean_distance_squared (std::size_t d, const scalar_t x, const scalar_t y)

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t	Euclidean_distance (std::size_t d, const scalar_t x, const scalar_t y)

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t	norm1_distance (std::size_t d, const scalar_t x, const scalar_t y)

Detailed Description

All of STRUMPACK is contained in the strumpack namespace.

Enumeration Type Documentation

◆ ClusteringAlgorithm

enum class strumpack::ClusteringAlgorithm

strong

Enumeration of clustering codes to order input data and create a binary cluster tree.

Enumerator
NATURAL	No reordering, split evenly
TWO_MEANS	Defines a binary tree using k(=2)-means
KD_TREE	Simple kd-tree clustering
PCA	Cluster based on the principal component
COBBLE	Cobble partitioning

◆ CompressionType

enum class strumpack::CompressionType

strong

Enumeration of rank-structured data formats, which can be used for compression within the sparse solver.

Enumerator
NONE	No compression, purely direct solver
HSS	HSS compression of frontal matrices
BLR	Block low-rank compression of fronts
HODLR	Hierarchically Off-diagonal Low-Rank compression of frontal matrices
BLR_HODLR	Block low-rank compression of medium fronts and Hierarchically Off-diagonal Low-Rank compression of large fronts
ZFP_BLR_HODLR	ZFP compression for small fronts, Block low-rank compression of medium fronts and Hierarchically Off-diagonal Low-Rank compression of large fronts
LOSSLESS	Lossless cmpresssion
LOSSY	Lossy cmpresssion

◆ Diag

enum class strumpack::Diag : char

strong

Whether the matrix in unit diagonal or not. Enumerations

Enumerator
U	Unit diagonal
N	Non-unit diagonal

◆ GramSchmidtType

enum class strumpack::GramSchmidtType

strong

Type of Gram-Schmidt orthogonalization used in GMRes.

Enumerator
CLASSICAL	Classical Gram-Schmidt is faster, more scalable.
MODIFIED	Modified Gram-Schmidt is slower, but stable.

◆ Jobz

enum class strumpack::Jobz : char

strong

Job for eigenvalue/vector computations

Enumerator
N	Compute eigenvalues only
V	Compute eigenvalues and eigenvectors

◆ KrylovSolver

enum class strumpack::KrylovSolver

strong

Type of outer iterative (Krylov) solver.

Enumerator
AUTO	Use iterative refinement if no compression is used, otherwise use GMRes.
DIRECT	No outer iterative solver, just a single application of the multifrontal solver.
REFINE	Iterative refinement.
PREC_GMRES	Preconditioned GMRes. The preconditioner is the (approx) multifrontal solver.
GMRES	UN-preconditioned GMRes. (for testing mainly)
PREC_BICGSTAB	Preconditioned BiCGStab. The preconditioner is the (approx) multifrontal solver.
BICGSTAB	UN-preconditioned BiCGStab. (for testing mainly)

◆ MatchingJob

enum class strumpack::MatchingJob

strong

Enumeration of possible matching algorithms, used for permutation of the sparse matrix to improve stability.

Enumerator
NONE	Don't do anything
MAX_CARDINALITY	Maximum cardinality
MAX_SMALLEST_DIAGONAL	Maximum smallest diagonal value
MAX_SMALLEST_DIAGONAL_2	Same as MAX_SMALLEST_DIAGONAL, but different algorithm
MAX_DIAGONAL_SUM	Maximum sum of diagonal values
MAX_DIAGONAL_PRODUCT_SCALING	Maximum product of diagonal values and row and column scaling
COMBBLAS	Use AWPM from CombBLAS

◆ ProportionalMapping

enum class strumpack::ProportionalMapping

strong

Enumeration of strategies for proportional mapping of the multifrontal tree.

Enumerator
FLOPS	Balance flops, optimze runtime
FACTOR_MEMORY	Balance final memory for LU factors
PEAK_MEMORY	Balance peak memory usage during factorization

◆ ReorderingStrategy

enum class strumpack::ReorderingStrategy

strong

Enumeration of possible sparse fill-reducing orderings.

Enumerator
NATURAL	Do not reorder the system
METIS	Use Metis nested-dissection reordering
PARMETIS	Use ParMetis nested-dissection reordering
SCOTCH	Use Scotch nested-dissection reordering
PTSCOTCH	Use PT-Scotch nested-dissection reordering
RCM	Use RCM reordering
GEOMETRIC	A simple geometric nested dissection code that only works for regular meshes. (see Sp::reorder)
AMD	Approximate minimum degree
MMD	Multiple minimum degree
AND	Nested dissection
MLF	Minimum local fill
SPECTRAL	Spectral nested dissection

◆ ReturnCode

enum class strumpack::ReturnCode

strong

Enumeration for the possible return codes.

Enumerator
SUCCESS	Operation completed successfully.
MATRIX_NOT_SET	The input matrix was not set.
REORDERING_ERROR	The matrix reordering failed.
ZERO_PIVOT	A zero pivot was encountered.
NO_CONVERGENCE	The iterative solver did not converge.
INACCURATE_INERTIA	Inertia could not be computed.

◆ Side

enum class strumpack::Side : char

strong

Which side to apply the operation on, as used by several BLAS routines.

Enumerator
L	Left side
R	Right side

◆ Trans

enum class strumpack::Trans : char

strong

Operation to perform on the matrix, as used by several BLAS routines.

Enumerator
N	No transpose
C	Complex conjugate
T	Transpose

◆ UpLo

enum class strumpack::UpLo : char

strong

Which triangular part of the matrix to consider, as used by several BLAS routines.

Enumerator
U	Upper triangle
L	Lower triangle

Function Documentation

◆ binary_tree_clustering()

template<typename scalar_t >

structured::ClusterTree strumpack::binary_tree_clustering	(	ClusteringAlgorithm	algo,
		DenseMatrix< scalar_t > &	p,
		std::vector< int > &	perm,
		std::size_t	cluster_size
	)

Reorder the input data and define a (binary) cluster tree.

Parameters

algo	ClusteringAlgorithm to use
p	Input data set. This is a dxn matrix (column major). d is the number of features, n is the number of datapoints. Hence the data is stored point after point (column major). This will be reordered according to perm.
perm	The permutation. The permutation uses 1-based indexing, so it can be used with lapack permutation routines (such as DenseMatrix::lapmt). This will be resized to the correct size, ie., n == p.cols().
cluster_size	Stop partitioning when this cluster_size is reached. This corresponds to the HSS/HODLR leaf size.

Returns: This is output, a structured::ClusterTree defined by the (recursive) clustering.

See also: strumpack::DenseMatrix::lapmt, get_clustering_algorithm, get_name(ClusteringAlgorithm)

◆ cast_matrix() [1/3]

template<typename scalar_t , typename integer_t , typename cast_t >

CSRMatrix< cast_t, integer_t > strumpack::cast_matrix ( const CSRMatrix< scalar_t, integer_t > & mat )

Creates a copy of a matrix templated on cast_t and integer_t. Original matrix is unmodified.

Template Parameters

scalar_t	value type of original matrix
integer_t	integer type of original matrix
cast_t	value type of returned matrix

Parameters

mat	const CSRMatrix<scalar_t,integer_t>&, const ref. of input matrix.

◆ cast_matrix() [2/3]

template<typename scalar_t , typename integer_t , typename cast_t >

CSRMatrixMPI< cast_t, integer_t > strumpack::cast_matrix ( const CSRMatrixMPI< scalar_t, integer_t > & mat )

Creates a copy of a matrix templated on cast_t and integer_t. Original matrix is unmodified.

Template Parameters

scalar_t	value type of original matrix
integer_t	integer type of original matrix
cast_t	value type of returned matrix

Parameters

mat	const CSRMatrix<scalar_t,integer_t>&, const ref. of input matrix.

◆ cast_matrix() [3/3]

template<typename scalar_t , typename cast_t >

DenseMatrix< cast_t > strumpack::cast_matrix ( const DenseMatrix< scalar_t > & mat )

Creates a copy of a matrix templated on cast_t. Original matrix is unmodified.

Template Parameters

scalar_t	value type of original matrix
cast_t	value type of returned matrix

Parameters

mat	const DenseMatrix<scalar_t>&, const ref. of input matrix.

◆ ConstDenseMatrixWrapperPtr() [1/2]

template<typename scalar_t >

std::unique_ptr< const DenseMatrixWrapper< scalar_t > > strumpack::ConstDenseMatrixWrapperPtr	(	std::size_t	m,
		std::size_t	n,
		const DenseMatrix< scalar_t > &	D,
		std::size_t	i,
		std::size_t	j
	)

Create a DenseMatrixWrapper for a const dense matrix. TODO: we need to find a better way to handle this. Should have i+m <= D.rows() and j+n <= D.cols().

Parameters

m	number of rows of the submatrix (the created wrapper), the view in the matrix D
n	number of columns of the submatrix
D	pointer to a dense matrix. This will not be modified, will not be freed.
ld	leading dimension of D
i	row offset in the matrix D, denoting the top left corner of the submatrix to be created
j	column offset in the matrix D, denoting the top left corner of the submatrix to be created

Returns: unique_ptr with a const DenseMatrixWrapper

◆ ConstDenseMatrixWrapperPtr() [2/2]

template<typename scalar_t >

std::unique_ptr< const DenseMatrixWrapper< scalar_t > > strumpack::ConstDenseMatrixWrapperPtr	(	std::size_t	m,
		std::size_t	n,
		const scalar_t *	D,
		std::size_t	ld
	)

Create a DenseMatrixWrapper for a const dense matrix. TODO: we need to find a better way to handle this.

Parameters

m	number of rows of the submatrix (the created wrapper), the view in the matrix D
n	number of columns of the submatrix
D	pointer to a dense matrix. This will not be modified, will not be freed.
ld	leading dimension of D, ld >= m

Returns: unique_ptr with a const DenseMatrixWrapper

◆ copy() [1/5]

template<typename scalar_from_t , typename scalar_to_t >

void strumpack::copy	(	const DenseMatrix< scalar_from_t > &	a,
		DenseMatrix< scalar_to_t > &	b,
		std::size_t	ib = `0`,
		std::size_t	jb = `0`
	)

Copy matrix a into matrix b at position ib, jb. Should have ib+a.rows() <= b.row() and jb+a.cols() <= b.cols().

Parameters

a	matrix to copy
b	matrix to copy to
ib	row offset of top left corner of place in b to copy to
jb	column offset of top left corner of place in b to copy to

◆ copy() [2/5]

template<typename scalar_t >

void strumpack::copy	(	const DenseMatrix< scalar_t > &	a,
		scalar_t *	b,
		std::size_t	ldb
	)

Copy matrix a into matrix b. Should have ldb >= a.rows(). Matrix b should have been allocated.

Parameters

a	matrix to copy
b	dense matrix to copy to
ldb	leading dimension of b

◆ copy() [3/5]

template<typename scalar_from_t , typename scalar_to_t >

void strumpack::copy	(	std::size_t	m,
		std::size_t	n,
		const DenseMatrix< scalar_from_t > &	a,
		std::size_t	ia,
		std::size_t	ja,
		DenseMatrix< scalar_to_t > &	b,
		std::size_t	ib,
		std::size_t	jb
	)

Copy submatrix of a at ia,ja of size m,n into b at position ib,jb. Should have ia+m <= a.rows(), ja+n <= a.cols(), ib+m <= b.rows() and jb.n <= b.cols().

Parameters

m	number of rows to copy
n	number of columns to copy
a	DenseMatrix to copy from
ia	row offset of top left corner of submatrix of a to copy
ja	column offset of top left corner of submatrix of a to copy
b	matrix to copy to
ib	row offset of top left corner of place in b to copy to
jb	column offset of top left corner of place in b to copy to

◆ copy() [4/5]

template<typename scalar_t >

void strumpack::copy	(	std::size_t	m,
		std::size_t	n,
		const DistributedMatrix< scalar_t > &	a,
		std::size_t	ia,
		std::size_t	ja,
		DenseMatrix< scalar_t > &	b,
		int	dest,
		int	context_all
	)

copy submatrix of a DistM_t at ia,ja of size m,n into a DenseM_t b at proc dest

◆ copy() [5/5]

template<typename scalar_t >

void strumpack::copy	(	std::size_t	m,
		std::size_t	n,
		const DistributedMatrix< scalar_t > &	a,
		std::size_t	ia,
		std::size_t	ja,
		DistributedMatrix< scalar_t > &	b,
		std::size_t	ib,
		std::size_t	jb,
		int	context_all
	)

copy submatrix of a at ia,ja of size m,n into b at position ib,jb

◆ default_abs_tol()

template<typename real_t >

real_t strumpack::default_abs_tol ( )

inline

Default absolute tolerance used when solving a linear system. For iterative solvers such as GMRES and BiCGStab, this is the residual tolerance. Exact value depends on the floating point type.

◆ default_rel_tol()

template<typename real_t >

real_t strumpack::default_rel_tol ( )

inline

Default relative tolerance used when solving a linear system. For iterative solvers such as GMRES and BiCGStab, this is the relative residual tolerance. Exact value depends on the floating point type.

◆ Euclidean_distance()

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>

real_t strumpack::Euclidean_distance	(	std::size_t	d,
		const scalar_t *	x,
		const scalar_t *	y
	)

Evaluate the Euclidean distance between two points x and y.

Template Parameters

scalar_t	datatype of the points
real_t	real type corresponding to scalar_t

Parameters

d	dimension of the points
x	pointer to first point (stored with stride 1)
y	pointer to second point (stored with stride 1)

◆ Euclidean_distance_squared()

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>

real_t strumpack::Euclidean_distance_squared	(	std::size_t	d,
		const scalar_t *	x,
		const scalar_t *	y
	)

Evaluate the Euclidean distance squared between two points x and y.

Template Parameters

scalar_t	datatype of the points
real_t	real type corresponding to scalar_t

Parameters

d	dimension of the points
x	pointer to first point (stored with stride 1)
y	pointer to second point (stored with stride 1)

◆ eye()

template<typename scalar_t >

DenseMatrix< scalar_t > strumpack::eye	(	std::size_t	m,
		std::size_t	n
	)

Create an identity matrix of size m x n, ie, 1 on the main diagonal, zero everywhere else.

Returns: DenseMatrix with rows()==m, cols()==n, operator()(i,i)==1 and operator()(i,j)==0 for i!=j.

◆ gemm()

template<typename scalar_t >

void strumpack::gemm	(	Trans	ta,
		Trans	tb,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		const DenseMatrix< scalar_t > &	b,
		scalar_t	beta,
		DenseMatrix< scalar_t > &	c,
		int	depth = `0`
	)

GEMM, defined for DenseMatrix objects (or DenseMatrixWrapper).

DGEMM performs one of the matrix-matrix operations

C := alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X or op( X ) = X**T,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters

depth current OpenMP task recursion depth

◆ gemm_flops() [1/2]

template<typename scalar_t >

long long int strumpack::gemm_flops	(	Trans	ta,
		Trans	tb,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		const DenseMatrix< scalar_t > &	b,
		scalar_t	beta
	)

return number of flops for a gemm, given a and b

◆ gemm_flops() [2/2]

template<typename scalar_t >

long long int strumpack::gemm_flops	(	Trans	ta,
		Trans	tb,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		scalar_t	beta,
		const DenseMatrix< scalar_t > &	c
	)

return number of flops for a gemm, given a and c

◆ gemv() [1/4]

template<typename scalar_t >

void strumpack::gemv	(	Trans	ta,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		const DenseMatrix< scalar_t > &	x,
		scalar_t	beta,
		DenseMatrix< scalar_t > &	y,
		int	depth = `0`
	)

DGEMV performs one of the matrix-vector operations

y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,

where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

◆ gemv() [2/4]

template<typename scalar_t >

void strumpack::gemv	(	Trans	ta,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		const DenseMatrix< scalar_t > &	x,
		scalar_t	beta,
		scalar_t *	y,
		int	incy,
		int	depth = `0`
	)

DGEMV performs one of the matrix-vector operations

y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,

where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

◆ gemv() [3/4]

template<typename scalar_t >

void strumpack::gemv	(	Trans	ta,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		const scalar_t *	x,
		int	incx,
		scalar_t	beta,
		DenseMatrix< scalar_t > &	y,
		int	depth = `0`
	)

DGEMV performs one of the matrix-vector operations

y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,

where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

◆ gemv() [4/4]

template<typename scalar_t >

void strumpack::gemv	(	Trans	ta,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		const scalar_t *	x,
		int	incx,
		scalar_t	beta,
		scalar_t *	y,
		int	incy,
		int	depth = `0`
	)

DGEMV performs one of the matrix-vector operations

y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,

where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

◆ get_clustering_algorithm()

ClusteringAlgorithm strumpack::get_clustering_algorithm ( const std::string & c )

inline

Return a ClusteringAlgorithm enum based on the input string.

Parameters

c	String, possible values are 'natural', '2means', 'kdtree', 'pca' and 'cobble'. This is case sensitive.

◆ get_description()

std::string strumpack::get_description ( MatchingJob job )

Return a string describing the matching algorithm.

◆ get_matching() [1/2]

MatchingJob strumpack::get_matching ( int job )

Convert a job number to a MatchingJob enum type.

◆ get_matching() [2/2]

int strumpack::get_matching ( MatchingJob job )

Convert a MatchingJob enum type to a job number. Prefer to use the MachingJob enum instead of the job number.

◆ get_name() [1/3]

std::string strumpack::get_name ( ClusteringAlgorithm c )

inline

Return a short string with the name of the clustering algorithm.

Parameters

c	Clustering algorithm.

Returns: String with the name of the clustering algorithm.

◆ get_name() [2/3]

std::string strumpack::get_name ( CompressionType comp )

Return a name/string for the CompressionType.

◆ get_name() [3/3]

std::string strumpack::get_name ( ReorderingStrategy method )

Return a string with the name of the reordering method.

◆ hconcat()

template<typename scalar_t >

DenseMatrix< scalar_t > strumpack::hconcat	(	const DenseMatrix< scalar_t > &	a,
		const DenseMatrix< scalar_t > &	b
	)

Horizontally concatenate 2 DenseMatrix objects a and b: [a; b]. Should have a.rows() == b.rows()

Parameters

a	dense matrix, will be placed left
b	dense matrix, will be placed right

Returns: [a b]

◆ ID_row_flops()

template<typename scalar_t >

long long int strumpack::ID_row_flops	(	const DenseMatrix< scalar_t > &	a,
		int	rank
	)

return number of flops for interpolative decomposition

◆ is_parallel()

bool strumpack::is_parallel ( ReorderingStrategy method )

Check whether or not the reordering needs to be run in parallel.

◆ LQ_flops()

template<typename scalar_t >

long long int strumpack::LQ_flops ( const DenseMatrix< scalar_t > & a )

return number of flops for LQ factorization

◆ LU_flops()

template<typename scalar_t >

long long int strumpack::LU_flops ( const DenseMatrix< scalar_t > & a )

return number of flops for LU factorization

◆ mpi_nprocs()

int strumpack::mpi_nprocs ( MPI_Comm c = MPI_COMM_WORLD )

inline

Return the number of processes in MPI communicator c, or in MPI_COMM_WORLD if c is not provided.

This routine is deprecated, will be removed soon. USe the MPIComm interface instead.

◆ mpi_rank()

int strumpack::mpi_rank ( MPI_Comm c = MPI_COMM_WORLD )

inline

Return this process rank in MPI communicator c, or in MPI_COMM_WORLD if c is not provided.

This routine is deprecated, will be removed soon. USe the MPIComm interface instead.

◆ mpi_type()

template<typename T >

MPI_Datatype strumpack::mpi_type ( )

Return the corresponding MPI_Datatype, for simple C++ data types.

Template Parameters

T	C++ type for which to return the corresponding MPI_Datatype

◆ mpi_type< bool >()

template<>

MPI_Datatype strumpack::mpi_type< bool > ( )

inline

return MPI datatype for C++ bool

◆ mpi_type< char >()

template<>

MPI_Datatype strumpack::mpi_type< char > ( )

inline

return MPI datatype for C++ char

◆ mpi_type< double >()

template<>

MPI_Datatype strumpack::mpi_type< double > ( )

inline

return MPI datatype for C++ double

◆ mpi_type< float >()

template<>

MPI_Datatype strumpack::mpi_type< float > ( )

inline

return MPI datatype for C++ float

◆ mpi_type< int >()

template<>

MPI_Datatype strumpack::mpi_type< int > ( )

inline

return MPI datatype for C++ int

◆ mpi_type< long >()

template<>

MPI_Datatype strumpack::mpi_type< long > ( )

inline

return MPI datatype for C++ long

◆ mpi_type< long long int >()

template<>

MPI_Datatype strumpack::mpi_type< long long int > ( )

inline

return MPI datatype for C++ long long int

◆ mpi_type< std::complex< double > >()

template<>

MPI_Datatype strumpack::mpi_type< std::complex< double > > ( )

inline

return MPI datatype for C++ std::complex<double>

◆ mpi_type< std::complex< float > >()

template<>

MPI_Datatype strumpack::mpi_type< std::complex< float > > ( )

inline

return MPI datatype for C++ std::complex<float>

◆ mpi_type< std::pair< int, int > >()

template<>

MPI_Datatype strumpack::mpi_type< std::pair< int, int > > ( )

inline

return MPI datatype for C++ std::pair<int,int>

◆ mpi_type< std::pair< long int, long int > >()

template<>

MPI_Datatype strumpack::mpi_type< std::pair< long int, long int > > ( )

inline

return MPI datatype for C++ std::pair<long int,long int>

◆ mpi_type< std::pair< long long int, long long int > >()

template<>

MPI_Datatype strumpack::mpi_type< std::pair< long long int, long long int > > ( )

inline

return MPI datatype for C++ std::pair<long long int,long long int>

◆ mpi_type< unsigned long >()

template<>

MPI_Datatype strumpack::mpi_type< unsigned long > ( )

inline

return MPI datatype for C++ unsigned long

◆ norm1_distance()

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>

real_t strumpack::norm1_distance	(	std::size_t	d,
		const scalar_t *	x,
		const scalar_t *	y
	)

Evaluate the 1-norm distance between two points x and y.

Template Parameters

scalar_t	datatype of the points
real_t	real type corresponding to scalar_t

Parameters

d	dimension of the points
x	pointer to first point (stored with stride 1)
x	pointer to second point (stored with stride 1)

◆ operator<<()

std::ostream & strumpack::operator<<	(	std::ostream &	os,
		const BLACSGrid *	g
	)

inline

Print some info about the BLACS grid to stream os. Just used for debugging.

◆ orthogonalize_flops()

template<typename scalar_t >

long long int strumpack::orthogonalize_flops ( const DenseMatrix< scalar_t > & a )

return number of flops for orthogonalization

◆ solve_flops()

template<typename scalar_t >

long long int strumpack::solve_flops ( const DenseMatrix< scalar_t > & b )

return number of flops for solve, using LU factorization

◆ trmm()

template<typename scalar_t >

void strumpack::trmm	(	Side	s,
		UpLo	ul,
		Trans	ta,
		Diag	d,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		DenseMatrix< scalar_t > &	b,
		int	depth = `0`
	)

TRMM performs one of the matrix-matrix operations

B := alpha*op(A)*B, or B := alpha*B*op(A),

where alpha is a scalar, B is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A**T.

◆ trsm()

template<typename scalar_t >

void strumpack::trsm	(	Side	s,
		UpLo	ul,
		Trans	ta,
		Diag	d,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		DenseMatrix< scalar_t > &	b,
		int	depth = `0`
	)

DTRSM solves one of the matrix equations

op( A )*X = alpha*B, or X*op( A ) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of

op( A ) = A or op( A ) = A**T.

The matrix X is overwritten on B.

◆ trsm_flops()

template<typename scalar_t >

long long int strumpack::trsm_flops	(	Side	s,
		scalar_t	alpha,
		const DenseMatrix< scalar_t > &	a,
		const DenseMatrix< scalar_t > &	b
	)

return number of flops for a trsm

◆ trsv()

template<typename scalar_t >

void strumpack::trsv	(	UpLo	ul,
		Trans	ta,
		Diag	d,
		const DenseMatrix< scalar_t > &	a,
		DenseMatrix< scalar_t > &	b,
		int	depth = `0`
	)

DTRSV solves one of the systems of equations

A*x = b, or A**T*x = b,

where b and x are n element vectors and A is an n by n unit, or non-unit, upper or lower triangular matrix.

◆ vconcat()

template<typename scalar_t >

DenseMatrix< scalar_t > strumpack::vconcat	(	const DenseMatrix< scalar_t > &	a,
		const DenseMatrix< scalar_t > &	b
	)

Vertically concatenate 2 DenseMatrix objects a and b: [a; b]. Should have a.cols() == b.cols()

Parameters

a	dense matrix, will be placed on top
b	dense matrix, will be below

Returns: [a; b]

◆ wait_all()

void strumpack::wait_all ( std::vector< MPIRequest > & reqs )

inline

Wait on all MPIRequests in a vector. Note that the MPI_Requests are not stored contiguously, and hence the implementation of this routine cannot use MPI_Waitall, but must wait on all requests individually.

If you need MPI_Waitall (or MPI_Waitany), for performance reasons, you should use a vector<MPI_Request>.

Namespaces

Classes

Typedefs

Enumerations

Functions

Detailed Description

Enumeration Type Documentation

◆ ClusteringAlgorithm

◆ CompressionType

◆ Diag

◆ GramSchmidtType

◆ Jobz

◆ KrylovSolver

◆ MatchingJob

◆ ProportionalMapping

◆ ReorderingStrategy

◆ ReturnCode

◆ Side

◆ Trans

◆ UpLo

Function Documentation

◆ binary_tree_clustering()

◆ cast_matrix() [1/3]

◆ cast_matrix() [2/3]

◆ cast_matrix() [3/3]

◆ ConstDenseMatrixWrapperPtr() [1/2]

◆ ConstDenseMatrixWrapperPtr() [2/2]

◆ copy() [1/5]

◆ copy() [2/5]

◆ copy() [3/5]

◆ copy() [4/5]

◆ copy() [5/5]

◆ default_abs_tol()

◆ default_rel_tol()

◆ Euclidean_distance()

◆ Euclidean_distance_squared()

◆ eye()

◆ gemm()

◆ gemm_flops() [1/2]

◆ gemm_flops() [2/2]

◆ gemv() [1/4]

◆ gemv() [2/4]

◆ gemv() [3/4]

◆ gemv() [4/4]

◆ get_clustering_algorithm()

◆ get_description()

◆ get_matching() [1/2]

◆ get_matching() [2/2]

◆ get_name() [1/3]

◆ get_name() [2/3]

◆ get_name() [3/3]

◆ hconcat()

◆ ID_row_flops()

◆ is_parallel()

◆ LQ_flops()

◆ LU_flops()

◆ mpi_nprocs()

◆ mpi_rank()

◆ mpi_type()

◆ mpi_type< bool >()

◆ mpi_type< char >()

◆ mpi_type< double >()

◆ mpi_type< float >()

◆ mpi_type< int >()

◆ mpi_type< long >()

◆ mpi_type< long long int >()

◆ mpi_type< std::complex< double > >()

◆ mpi_type< std::complex< float > >()

◆ mpi_type< std::pair< int, int > >()

◆ mpi_type< std::pair< long int, long int > >()

◆ mpi_type< std::pair< long long int, long long int > >()

◆ mpi_type< unsigned long >()

◆ norm1_distance()

◆ operator<<()

◆ orthogonalize_flops()

◆ solve_flops()

◆ trmm()

◆ trsm()

◆ trsm_flops()

◆ trsv()