strumpack Namespace Reference

Namespaces

namespace  BLR
 
namespace  HODLR
 
namespace  HSS
 
namespace  kernel
 
namespace  random
 
namespace  structured
 

Classes

class  BLACSGrid
 This is a small wrapper class around a BLACS grid and a BLACS context. More...
 
class  CompressedSparseMatrix
 Abstract base class for compressed sparse matrix storage. More...
 
class  CSRGraph
 
class  CSRMatrix
 Class for storing a compressed sparse row matrix (single node). More...
 
class  CSRMatrixMPI
 Block-row distributed compressed sparse row storage. More...
 
class  DenseMatrix
 This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK routines. More...
 
class  DenseMatrixWrapper
 Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK routines. However, objects of the DenseMatrixWrapper class do not allocate, own or free any data. More...
 
class  DistributedMatrix
 2D block cyclicly distributed matrix, as used by ScaLAPACK. More...
 
class  DistributedMatrixWrapper
 
class  EliminationTree
 
class  EliminationTreeMPIDist
 
class  Equilibration
 
class  ExtendAdd
 
class  FrontalMatrixBLRMPI
 
class  MatchingData
 
class  MatrixReordering
 
class  MatrixReorderingMPI
 
class  MPIComm
 Wrapper class around an MPI_Comm object. More...
 
class  MPIRequest
 Wrapper around an MPI_Request object. More...
 
class  SparseSolver
 SparseSolver is the main sequential or multithreaded sparse solver class. More...
 
class  SparseSolverBase
 SparseSolverBase is the virtual base for both the sequential/multithreaded and distributed sparse solver classes. More...
 
class  SparseSolverMixedPrecision
 SparseSolverMixedPrecision Allows to use lower precision (float) for the factorization/preconditioner, and higher (double) for the outer iterative solver. See also LAPACK's dsgesv. More...
 
class  SparseSolverMPIDist
 This is the fully distributed solver. More...
 
class  SPMVBuffers
 
class  SPOptions
 Options for the sparse solver. More...
 

Typedefs

template<typename scalar_t , typename integer_t >
using StrumpackSparseSolverBase = SparseSolverBase< scalar_t, integer_t >
 
template<typename scalar_t , typename integer_t >
using StrumpackSparseSolver = SparseSolver< scalar_t, integer_t >
 
template<typename factor_t , typename refine_t , typename integer_t >
using StrumpackSparseSolverMixedPrecision = SparseSolverMixedPrecision< factor_t, refine_t, integer_t >
 
template<typename scalar_t , typename integer_t >
using StrumpackSparseSolverMPIDist = SparseSolverMPIDist< scalar_t, integer_t >
 

Enumerations

enum class  ProportionalMapping { FLOPS , FACTOR_MEMORY , PEAK_MEMORY }
 
enum class  ReorderingStrategy {
  NATURAL , METIS , PARMETIS , SCOTCH ,
  PTSCOTCH , RCM , GEOMETRIC , AMD ,
  MMD , AND , MLF , SPECTRAL
}
 
enum class  CompressionType {
  NONE , HSS , BLR , HODLR ,
  BLR_HODLR , ZFP_BLR_HODLR , LOSSLESS , LOSSY
}
 
enum class  MatchingJob {
  NONE , MAX_CARDINALITY , MAX_SMALLEST_DIAGONAL , MAX_SMALLEST_DIAGONAL_2 ,
  MAX_DIAGONAL_SUM , MAX_DIAGONAL_PRODUCT_SCALING , COMBBLAS
}
 
enum class  EquilibrationType : char { NONE ='N' , ROW ='R' , COLUMN ='C' , BOTH ='B' }
 
enum class  GramSchmidtType { CLASSICAL , MODIFIED }
 
enum class  KrylovSolver {
  AUTO , DIRECT , REFINE , PREC_GMRES ,
  GMRES , PREC_BICGSTAB , BICGSTAB
}
 
enum class  ReturnCode {
  SUCCESS , MATRIX_NOT_SET , REORDERING_ERROR , ZERO_PIVOT ,
  NO_CONVERGENCE , INACCURATE_INERTIA
}
 Enumeration for the possible return codes. More...
 
enum class  Trans : char { N ='N' , C ='C' , T ='T' }
 
enum class  Side : char { L ='L' , R ='R' }
 
enum class  UpLo : char { U ='U' , L ='L' }
 
enum class  Diag : char { U ='U' , N ='N' }
 
enum class  Jobz : char { N ='N' , V ='V' }
 
enum class  ClusteringAlgorithm {
  NATURAL , TWO_MEANS , KD_TREE , PCA ,
  COBBLE
}
 

Functions

std::string get_name (ReorderingStrategy method)
 
bool is_parallel (ReorderingStrategy method)
 
std::string get_name (CompressionType comp)
 
MatchingJob get_matching (int job)
 
int get_matching (MatchingJob job)
 
std::string get_description (MatchingJob job)
 
template<typename real_t >
real_t default_rel_tol ()
 
template<typename real_t >
real_t default_abs_tol ()
 
template<>
float default_rel_tol ()
 
template<>
float default_abs_tol ()
 
int default_gpu_streams ()
 
std::ostream & operator<< (std::ostream &os, ReturnCode &e)
 
template<typename scalar_t , typename integer_t , typename cast_t >
CSRMatrix< cast_t, integer_t > cast_matrix (const CSRMatrix< scalar_t, integer_t > &mat)
 
template<typename scalar_t , typename integer_t , typename cast_t >
CSRMatrixMPI< cast_t, integer_t > cast_matrix (const CSRMatrixMPI< scalar_t, integer_t > &mat)
 
Trans c2T (char op)
 
template<typename scalar_t >
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > ConstDenseMatrixWrapperPtr (std::size_t m, std::size_t n, const scalar_t *D, std::size_t ld)
 
template<typename scalar_t >
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > ConstDenseMatrixWrapperPtr (std::size_t m, std::size_t n, const DenseMatrix< scalar_t > &D, std::size_t i, std::size_t j)
 
template<typename scalar_from_t , typename scalar_to_t >
void copy (std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb)
 
template<typename scalar_from_t , typename scalar_to_t >
void copy (const DenseMatrix< scalar_from_t > &a, DenseMatrix< scalar_to_t > &b, std::size_t ib=0, std::size_t jb=0)
 
template<typename scalar_t >
void copy (const DenseMatrix< scalar_t > &a, scalar_t *b, std::size_t ldb)
 
template<typename scalar_t >
DenseMatrix< scalar_t > vconcat (const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
 
template<typename scalar_t >
DenseMatrix< scalar_t > hconcat (const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
 
template<typename scalar_t >
DenseMatrix< scalar_t > eye (std::size_t m, std::size_t n)
 
template<typename scalar_t >
void gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
 
template<typename scalar_t >
void gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t *b, int ldb, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0)
 
template<typename scalar_t >
void gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, scalar_t *c, int ldc, int depth=0)
 
template<typename scalar_t >
void trmm (Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
 
template<typename scalar_t >
void trsm (Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
 
template<typename scalar_t >
void trsv (UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0)
 
template<typename scalar_t >
void gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
 
template<typename scalar_t >
void gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t *x, int incx, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0)
 
template<typename scalar_t >
void gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, scalar_t *y, int incy, int depth=0)
 
template<typename scalar_t >
void gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t *x, int incx, scalar_t beta, scalar_t *y, int incy, int depth=0)
 
template<typename scalar_t >
long long int LU_flops (const DenseMatrix< scalar_t > &a)
 
template<typename scalar_t >
long long int solve_flops (const DenseMatrix< scalar_t > &b)
 
template<typename scalar_t >
long long int LQ_flops (const DenseMatrix< scalar_t > &a)
 
template<typename scalar_t >
long long int ID_row_flops (const DenseMatrix< scalar_t > &a, int rank)
 
template<typename scalar_t >
long long int trsm_flops (Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b)
 
template<typename scalar_t >
long long int gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta)
 
template<typename scalar_t >
long long int gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, scalar_t beta, const DenseMatrix< scalar_t > &c)
 
template<typename scalar_t >
long long int orthogonalize_flops (const DenseMatrix< scalar_t > &a)
 
template<typename scalar_t , typename cast_t >
DenseMatrix< cast_t > cast_matrix (const DenseMatrix< scalar_t > &mat)
 
template<typename scalar_t >
void copy (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_t > &b, int dest, int context_all)
 
template<typename scalar_t >
void copy (std::size_t m, std::size_t n, const DenseMatrix< scalar_t > &a, int src, DistributedMatrix< scalar_t > &b, std::size_t ib, std::size_t jb, int context_all)
 
template<typename scalar_t >
void copy (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &a, std::size_t ia, std::size_t ja, DistributedMatrix< scalar_t > &b, std::size_t ib, std::size_t jb, int context_all)
 
template<typename scalar_t >
long long int LU_flops (const DistributedMatrix< scalar_t > &a)
 
template<typename scalar_t >
long long int solve_flops (const DistributedMatrix< scalar_t > &b)
 
template<typename scalar_t >
long long int LQ_flops (const DistributedMatrix< scalar_t > &a)
 
template<typename scalar_t >
long long int ID_row_flops (const DistributedMatrix< scalar_t > &a, int rank)
 
template<typename scalar_t >
long long int trsm_flops (Side s, scalar_t alpha, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b)
 
template<typename scalar_t >
long long int gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, scalar_t beta)
 
template<typename scalar_t >
long long int gemv_flops (Trans ta, const DistributedMatrix< scalar_t > &a, scalar_t alpha, scalar_t beta)
 
template<typename scalar_t >
long long int orthogonalize_flops (const DistributedMatrix< scalar_t > &a)
 
template<typename scalar_t >
std::unique_ptr< const DistributedMatrixWrapper< scalar_t > > ConstDistributedMatrixWrapperPtr (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &D, std::size_t i, std::size_t j)
 
template<typename scalar_t >
void gemm (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix< scalar_t > &A, const DistributedMatrix< scalar_t > &B, scalar_t beta, DistributedMatrix< scalar_t > &C)
 
template<typename scalar_t >
void trsm (Side s, UpLo u, Trans ta, Diag d, scalar_t alpha, const DistributedMatrix< scalar_t > &A, DistributedMatrix< scalar_t > &B)
 
template<typename scalar_t >
void trsv (UpLo ul, Trans ta, Diag d, const DistributedMatrix< scalar_t > &A, DistributedMatrix< scalar_t > &B)
 
template<typename scalar_t >
void gemv (Trans ta, scalar_t alpha, const DistributedMatrix< scalar_t > &A, const DistributedMatrix< scalar_t > &X, scalar_t beta, DistributedMatrix< scalar_t > &Y)
 
template<typename scalar_t >
DistributedMatrix< scalar_t > vconcat (int cols, int arows, int brows, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, const BLACSGrid *gnew, int cxt_all)
 
template<typename scalar_t >
void subgrid_copy_to_buffers (const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, int p0, int npr, int npc, std::vector< std::vector< scalar_t > > &sbuf)
 
template<typename scalar_t >
void subproc_copy_to_buffers (const DenseMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, int p0, int npr, int npc, std::vector< std::vector< scalar_t > > &sbuf)
 
template<typename scalar_t >
void subgrid_add_from_buffers (const BLACSGrid *subg, int master, DistributedMatrix< scalar_t > &b, std::vector< scalar_t * > &pbuf)
 
std::ostream & operator<< (std::ostream &os, const BLACSGrid *g)
 
template<typename T >
MPI_Datatype mpi_type ()
 
template<>
MPI_Datatype mpi_type< char > ()
 
template<>
MPI_Datatype mpi_type< bool > ()
 
template<>
MPI_Datatype mpi_type< int > ()
 
template<>
MPI_Datatype mpi_type< long > ()
 
template<>
MPI_Datatype mpi_type< unsigned long > ()
 
template<>
MPI_Datatype mpi_type< long long int > ()
 
template<>
MPI_Datatype mpi_type< float > ()
 
template<>
MPI_Datatype mpi_type< double > ()
 
template<>
MPI_Datatype mpi_type< std::complex< float > > ()
 
template<>
MPI_Datatype mpi_type< std::complex< double > > ()
 
template<>
MPI_Datatype mpi_type< std::pair< int, int > > ()
 
template<>
MPI_Datatype mpi_type< std::pair< long int, long int > > ()
 
template<>
MPI_Datatype mpi_type< std::pair< long long int, long long int > > ()
 
void wait_all (std::vector< MPIRequest > &reqs)
 
void wait_all (std::vector< MPI_Request > &reqs)
 
int mpi_rank (MPI_Comm c=MPI_COMM_WORLD)
 
int mpi_nprocs (MPI_Comm c=MPI_COMM_WORLD)
 
std::string get_name (ClusteringAlgorithm c)
 
ClusteringAlgorithm get_clustering_algorithm (const std::string &c)
 
template<typename T >
void pca_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, int *perm)
 
template<typename T >
structured::ClusterTree recursive_pca (DenseMatrix< T > &p, std::size_t cluster_size, int *perm)
 
template<typename T >
void cobble_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, int *perm)
 
template<typename T >
structured::ClusterTree recursive_cobble (DenseMatrix< T > &p, std::size_t cluster_size, int *perm)
 
template<typename T >
structured::ClusterTree recursive_2_means (DenseMatrix< T > &p, std::size_t cluster_size, int *perm, std::mt19937 &generator)
 
template<typename T >
void kd_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, std::size_t cluster_size, int *perm)
 
template<typename T >
structured::ClusterTree recursive_kd (DenseMatrix< T > &p, std::size_t cluster_size, int *perm)
 
template<typename scalar_t >
structured::ClusterTree binary_tree_clustering (ClusteringAlgorithm algo, DenseMatrix< scalar_t > &p, std::vector< int > &perm, std::size_t cluster_size)
 
template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t Euclidean_distance_squared (std::size_t d, const scalar_t *x, const scalar_t *y)
 
template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t Euclidean_distance (std::size_t d, const scalar_t *x, const scalar_t *y)
 
template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t norm1_distance (std::size_t d, const scalar_t *x, const scalar_t *y)
 

Detailed Description

All of STRUMPACK is contained in the strumpack namespace.

Enumeration Type Documentation

◆ ClusteringAlgorithm

enum class strumpack::ClusteringAlgorithm
strong

Enumeration of clustering codes to order input data and create a binary cluster tree.

Enumerator
NATURAL 

No reordering, split evenly

TWO_MEANS 

Defines a binary tree using k(=2)-means

KD_TREE 

Simple kd-tree clustering

PCA 

Cluster based on the principal component

COBBLE 

Cobble partitioning

◆ CompressionType

enum class strumpack::CompressionType
strong

Enumeration of rank-structured data formats, which can be used for compression within the sparse solver.

Enumerator
NONE 

No compression, purely direct solver

HSS 

HSS compression of frontal matrices

BLR 

Block low-rank compression of fronts

HODLR 

Hierarchically Off-diagonal Low-Rank compression of frontal matrices

BLR_HODLR 

Block low-rank compression of medium fronts and Hierarchically Off-diagonal Low-Rank compression of large fronts

ZFP_BLR_HODLR 

ZFP compression for small fronts, Block low-rank compression of medium fronts and Hierarchically Off-diagonal Low-Rank compression of large fronts

LOSSLESS 

Lossless cmpresssion

LOSSY 

Lossy cmpresssion

◆ Diag

enum class strumpack::Diag : char
strong

Whether the matrix in unit diagonal or not. Enumerations

Enumerator

Unit diagonal

Non-unit diagonal

◆ GramSchmidtType

enum class strumpack::GramSchmidtType
strong

Type of Gram-Schmidt orthogonalization used in GMRes.

Enumerator
CLASSICAL 

Classical Gram-Schmidt is faster, more scalable.

MODIFIED 

Modified Gram-Schmidt is slower, but stable.

◆ Jobz

enum class strumpack::Jobz : char
strong

Job for eigenvalue/vector computations

Enumerator

Compute eigenvalues only

Compute eigenvalues and eigenvectors

◆ KrylovSolver

enum class strumpack::KrylovSolver
strong

Type of outer iterative (Krylov) solver.

Enumerator
AUTO 

Use iterative refinement if no compression is used, otherwise use GMRes.

DIRECT 

No outer iterative solver, just a single application of the multifrontal solver.

REFINE 

Iterative refinement.

PREC_GMRES 

Preconditioned GMRes. The preconditioner is the (approx) multifrontal solver.

GMRES 

UN-preconditioned GMRes. (for testing mainly)

PREC_BICGSTAB 

Preconditioned BiCGStab. The preconditioner is the (approx) multifrontal solver.

BICGSTAB 

UN-preconditioned BiCGStab. (for testing mainly)

◆ MatchingJob

enum class strumpack::MatchingJob
strong

Enumeration of possible matching algorithms, used for permutation of the sparse matrix to improve stability.

Enumerator
NONE 

Don't do anything

MAX_CARDINALITY 

Maximum cardinality

MAX_SMALLEST_DIAGONAL 

Maximum smallest diagonal value

MAX_SMALLEST_DIAGONAL_2 

Same as MAX_SMALLEST_DIAGONAL, but different algorithm

MAX_DIAGONAL_SUM 

Maximum sum of diagonal values

MAX_DIAGONAL_PRODUCT_SCALING 

Maximum product of diagonal values and row and column scaling

COMBBLAS 

Use AWPM from CombBLAS

◆ ProportionalMapping

enum class strumpack::ProportionalMapping
strong

Enumeration of strategies for proportional mapping of the multifrontal tree.

Enumerator
FLOPS 

Balance flops, optimze runtime

FACTOR_MEMORY 

Balance final memory for LU factors

PEAK_MEMORY 

Balance peak memory usage during factorization

◆ ReorderingStrategy

enum class strumpack::ReorderingStrategy
strong

Enumeration of possible sparse fill-reducing orderings.

Enumerator
NATURAL 

Do not reorder the system

METIS 

Use Metis nested-dissection reordering

PARMETIS 

Use ParMetis nested-dissection reordering

SCOTCH 

Use Scotch nested-dissection reordering

PTSCOTCH 

Use PT-Scotch nested-dissection reordering

RCM 

Use RCM reordering

GEOMETRIC 

A simple geometric nested dissection code that only works for regular meshes. (see Sp::reorder)

AMD 

Approximate minimum degree

MMD 

Multiple minimum degree

AND 

Nested dissection

MLF 

Minimum local fill

SPECTRAL 

Spectral nested dissection

◆ ReturnCode

enum class strumpack::ReturnCode
strong

Enumeration for the possible return codes.

Enumerator
SUCCESS 

Operation completed successfully.

MATRIX_NOT_SET 

The input matrix was not set.

REORDERING_ERROR 

The matrix reordering failed.

ZERO_PIVOT 

A zero pivot was encountered.

NO_CONVERGENCE 

The iterative solver did not converge.

INACCURATE_INERTIA 

Inertia could not be computed.

◆ Side

enum class strumpack::Side : char
strong

Which side to apply the operation on, as used by several BLAS routines.

Enumerator

Left side

Right side

◆ Trans

enum class strumpack::Trans : char
strong

Operation to perform on the matrix, as used by several BLAS routines.

Enumerator

No transpose

Complex conjugate

Transpose

◆ UpLo

enum class strumpack::UpLo : char
strong

Which triangular part of the matrix to consider, as used by several BLAS routines.

Enumerator

Upper triangle

Lower triangle

Function Documentation

◆ binary_tree_clustering()

template<typename scalar_t >
structured::ClusterTree strumpack::binary_tree_clustering ( ClusteringAlgorithm  algo,
DenseMatrix< scalar_t > &  p,
std::vector< int > &  perm,
std::size_t  cluster_size 
)

Reorder the input data and define a (binary) cluster tree.

Parameters
algoClusteringAlgorithm to use
pInput data set. This is a dxn matrix (column major). d is the number of features, n is the number of datapoints. Hence the data is stored point after point (column major). This will be reordered according to perm.
permThe permutation. The permutation uses 1-based indexing, so it can be used with lapack permutation routines (such as DenseMatrix::lapmt). This will be resized to the correct size, ie., n == p.cols().
cluster_sizeStop partitioning when this cluster_size is reached. This corresponds to the HSS/HODLR leaf size.
Returns
This is output, a structured::ClusterTree defined by the (recursive) clustering.
See also
strumpack::DenseMatrix::lapmt, get_clustering_algorithm, get_name(ClusteringAlgorithm)

◆ cast_matrix() [1/3]

template<typename scalar_t , typename integer_t , typename cast_t >
CSRMatrix< cast_t, integer_t > strumpack::cast_matrix ( const CSRMatrix< scalar_t, integer_t > &  mat)

Creates a copy of a matrix templated on cast_t and integer_t. Original matrix is unmodified.

Template Parameters
scalar_tvalue type of original matrix
integer_tinteger type of original matrix
cast_tvalue type of returned matrix
Parameters
matconst CSRMatrix<scalar_t,integer_t>&, const ref. of input matrix.

◆ cast_matrix() [2/3]

template<typename scalar_t , typename integer_t , typename cast_t >
CSRMatrixMPI< cast_t, integer_t > strumpack::cast_matrix ( const CSRMatrixMPI< scalar_t, integer_t > &  mat)

Creates a copy of a matrix templated on cast_t and integer_t. Original matrix is unmodified.

Template Parameters
scalar_tvalue type of original matrix
integer_tinteger type of original matrix
cast_tvalue type of returned matrix
Parameters
matconst CSRMatrix<scalar_t,integer_t>&, const ref. of input matrix.

◆ cast_matrix() [3/3]

template<typename scalar_t , typename cast_t >
DenseMatrix< cast_t > strumpack::cast_matrix ( const DenseMatrix< scalar_t > &  mat)

Creates a copy of a matrix templated on cast_t. Original matrix is unmodified.

Template Parameters
scalar_tvalue type of original matrix
cast_tvalue type of returned matrix
Parameters
matconst DenseMatrix<scalar_t>&, const ref. of input matrix.

◆ ConstDenseMatrixWrapperPtr() [1/2]

template<typename scalar_t >
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > strumpack::ConstDenseMatrixWrapperPtr ( std::size_t  m,
std::size_t  n,
const DenseMatrix< scalar_t > &  D,
std::size_t  i,
std::size_t  j 
)

Create a DenseMatrixWrapper for a const dense matrix. TODO: we need to find a better way to handle this. Should have i+m <= D.rows() and j+n <= D.cols().

Parameters
mnumber of rows of the submatrix (the created wrapper), the view in the matrix D
nnumber of columns of the submatrix
Dpointer to a dense matrix. This will not be modified, will not be freed.
ldleading dimension of D
irow offset in the matrix D, denoting the top left corner of the submatrix to be created
jcolumn offset in the matrix D, denoting the top left corner of the submatrix to be created
Returns
unique_ptr with a const DenseMatrixWrapper

◆ ConstDenseMatrixWrapperPtr() [2/2]

template<typename scalar_t >
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > strumpack::ConstDenseMatrixWrapperPtr ( std::size_t  m,
std::size_t  n,
const scalar_t *  D,
std::size_t  ld 
)

Create a DenseMatrixWrapper for a const dense matrix. TODO: we need to find a better way to handle this.

Parameters
mnumber of rows of the submatrix (the created wrapper), the view in the matrix D
nnumber of columns of the submatrix
Dpointer to a dense matrix. This will not be modified, will not be freed.
ldleading dimension of D, ld >= m
Returns
unique_ptr with a const DenseMatrixWrapper

◆ copy() [1/5]

template<typename scalar_from_t , typename scalar_to_t >
void strumpack::copy ( const DenseMatrix< scalar_from_t > &  a,
DenseMatrix< scalar_to_t > &  b,
std::size_t  ib = 0,
std::size_t  jb = 0 
)

Copy matrix a into matrix b at position ib, jb. Should have ib+a.rows() <= b.row() and jb+a.cols() <= b.cols().

Parameters
amatrix to copy
bmatrix to copy to
ibrow offset of top left corner of place in b to copy to
jbcolumn offset of top left corner of place in b to copy to

◆ copy() [2/5]

template<typename scalar_t >
void strumpack::copy ( const DenseMatrix< scalar_t > &  a,
scalar_t *  b,
std::size_t  ldb 
)

Copy matrix a into matrix b. Should have ldb >= a.rows(). Matrix b should have been allocated.

Parameters
amatrix to copy
bdense matrix to copy to
ldbleading dimension of b

◆ copy() [3/5]

template<typename scalar_from_t , typename scalar_to_t >
void strumpack::copy ( std::size_t  m,
std::size_t  n,
const DenseMatrix< scalar_from_t > &  a,
std::size_t  ia,
std::size_t  ja,
DenseMatrix< scalar_to_t > &  b,
std::size_t  ib,
std::size_t  jb 
)

Copy submatrix of a at ia,ja of size m,n into b at position ib,jb. Should have ia+m <= a.rows(), ja+n <= a.cols(), ib+m <= b.rows() and jb.n <= b.cols().

Parameters
mnumber of rows to copy
nnumber of columns to copy
aDenseMatrix to copy from
iarow offset of top left corner of submatrix of a to copy
jacolumn offset of top left corner of submatrix of a to copy
bmatrix to copy to
ibrow offset of top left corner of place in b to copy to
jbcolumn offset of top left corner of place in b to copy to

◆ copy() [4/5]

template<typename scalar_t >
void strumpack::copy ( std::size_t  m,
std::size_t  n,
const DistributedMatrix< scalar_t > &  a,
std::size_t  ia,
std::size_t  ja,
DenseMatrix< scalar_t > &  b,
int  dest,
int  context_all 
)

copy submatrix of a DistM_t at ia,ja of size m,n into a DenseM_t b at proc dest

◆ copy() [5/5]

template<typename scalar_t >
void strumpack::copy ( std::size_t  m,
std::size_t  n,
const DistributedMatrix< scalar_t > &  a,
std::size_t  ia,
std::size_t  ja,
DistributedMatrix< scalar_t > &  b,
std::size_t  ib,
std::size_t  jb,
int  context_all 
)

copy submatrix of a at ia,ja of size m,n into b at position ib,jb

◆ default_abs_tol()

template<typename real_t >
real_t strumpack::default_abs_tol ( )
inline

Default absolute tolerance used when solving a linear system. For iterative solvers such as GMRES and BiCGStab, this is the residual tolerance. Exact value depends on the floating point type.

◆ default_rel_tol()

template<typename real_t >
real_t strumpack::default_rel_tol ( )
inline

Default relative tolerance used when solving a linear system. For iterative solvers such as GMRES and BiCGStab, this is the relative residual tolerance. Exact value depends on the floating point type.

◆ Euclidean_distance()

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t strumpack::Euclidean_distance ( std::size_t  d,
const scalar_t *  x,
const scalar_t *  y 
)

Evaluate the Euclidean distance between two points x and y.

Template Parameters
scalar_tdatatype of the points
real_treal type corresponding to scalar_t
Parameters
ddimension of the points
xpointer to first point (stored with stride 1)
ypointer to second point (stored with stride 1)

◆ Euclidean_distance_squared()

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t strumpack::Euclidean_distance_squared ( std::size_t  d,
const scalar_t *  x,
const scalar_t *  y 
)

Evaluate the Euclidean distance squared between two points x and y.

Template Parameters
scalar_tdatatype of the points
real_treal type corresponding to scalar_t
Parameters
ddimension of the points
xpointer to first point (stored with stride 1)
ypointer to second point (stored with stride 1)

◆ eye()

template<typename scalar_t >
DenseMatrix< scalar_t > strumpack::eye ( std::size_t  m,
std::size_t  n 
)

Create an identity matrix of size m x n, ie, 1 on the main diagonal, zero everywhere else.

Returns
DenseMatrix with rows()==m, cols()==n, operator()(i,i)==1 and operator()(i,j)==0 for i!=j.

◆ gemm()

template<typename scalar_t >
void strumpack::gemm ( Trans  ta,
Trans  tb,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
const DenseMatrix< scalar_t > &  b,
scalar_t  beta,
DenseMatrix< scalar_t > &  c,
int  depth = 0 
)

GEMM, defined for DenseMatrix objects (or DenseMatrixWrapper).

DGEMM performs one of the matrix-matrix operations

C := alpha*op( A )*op( B ) + beta*C,

where op( X ) is one of

op( X ) = X or op( X ) = X**T,

alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.

Parameters
depthcurrent OpenMP task recursion depth

◆ gemm_flops() [1/2]

template<typename scalar_t >
long long int strumpack::gemm_flops ( Trans  ta,
Trans  tb,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
const DenseMatrix< scalar_t > &  b,
scalar_t  beta 
)

return number of flops for a gemm, given a and b

◆ gemm_flops() [2/2]

template<typename scalar_t >
long long int strumpack::gemm_flops ( Trans  ta,
Trans  tb,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
scalar_t  beta,
const DenseMatrix< scalar_t > &  c 
)

return number of flops for a gemm, given a and c

◆ gemv() [1/4]

template<typename scalar_t >
void strumpack::gemv ( Trans  ta,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
const DenseMatrix< scalar_t > &  x,
scalar_t  beta,
DenseMatrix< scalar_t > &  y,
int  depth = 0 
)

DGEMV performs one of the matrix-vector operations

y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,

where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

◆ gemv() [2/4]

template<typename scalar_t >
void strumpack::gemv ( Trans  ta,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
const DenseMatrix< scalar_t > &  x,
scalar_t  beta,
scalar_t *  y,
int  incy,
int  depth = 0 
)

DGEMV performs one of the matrix-vector operations

y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,

where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

◆ gemv() [3/4]

template<typename scalar_t >
void strumpack::gemv ( Trans  ta,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
const scalar_t *  x,
int  incx,
scalar_t  beta,
DenseMatrix< scalar_t > &  y,
int  depth = 0 
)

DGEMV performs one of the matrix-vector operations

y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,

where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

◆ gemv() [4/4]

template<typename scalar_t >
void strumpack::gemv ( Trans  ta,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
const scalar_t *  x,
int  incx,
scalar_t  beta,
scalar_t *  y,
int  incy,
int  depth = 0 
)

DGEMV performs one of the matrix-vector operations

y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,

where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

◆ get_clustering_algorithm()

ClusteringAlgorithm strumpack::get_clustering_algorithm ( const std::string &  c)
inline

Return a ClusteringAlgorithm enum based on the input string.

Parameters
cString, possible values are 'natural', '2means', 'kdtree', 'pca' and 'cobble'. This is case sensitive.

◆ get_description()

std::string strumpack::get_description ( MatchingJob  job)

Return a string describing the matching algorithm.

◆ get_matching() [1/2]

MatchingJob strumpack::get_matching ( int  job)

Convert a job number to a MatchingJob enum type.

◆ get_matching() [2/2]

int strumpack::get_matching ( MatchingJob  job)

Convert a MatchingJob enum type to a job number. Prefer to use the MachingJob enum instead of the job number.

◆ get_name() [1/3]

std::string strumpack::get_name ( ClusteringAlgorithm  c)
inline

Return a short string with the name of the clustering algorithm.

Parameters
cClustering algorithm.
Returns
String with the name of the clustering algorithm.

◆ get_name() [2/3]

std::string strumpack::get_name ( CompressionType  comp)

Return a name/string for the CompressionType.

◆ get_name() [3/3]

std::string strumpack::get_name ( ReorderingStrategy  method)

Return a string with the name of the reordering method.

◆ hconcat()

template<typename scalar_t >
DenseMatrix< scalar_t > strumpack::hconcat ( const DenseMatrix< scalar_t > &  a,
const DenseMatrix< scalar_t > &  b 
)

Horizontally concatenate 2 DenseMatrix objects a and b: [a; b]. Should have a.rows() == b.rows()

Parameters
adense matrix, will be placed left
bdense matrix, will be placed right
Returns
[a b]

◆ ID_row_flops()

template<typename scalar_t >
long long int strumpack::ID_row_flops ( const DenseMatrix< scalar_t > &  a,
int  rank 
)

return number of flops for interpolative decomposition

◆ is_parallel()

bool strumpack::is_parallel ( ReorderingStrategy  method)

Check whether or not the reordering needs to be run in parallel.

◆ LQ_flops()

template<typename scalar_t >
long long int strumpack::LQ_flops ( const DenseMatrix< scalar_t > &  a)

return number of flops for LQ factorization

◆ LU_flops()

template<typename scalar_t >
long long int strumpack::LU_flops ( const DenseMatrix< scalar_t > &  a)

return number of flops for LU factorization

◆ mpi_nprocs()

int strumpack::mpi_nprocs ( MPI_Comm  c = MPI_COMM_WORLD)
inline

Return the number of processes in MPI communicator c, or in MPI_COMM_WORLD if c is not provided.

This routine is deprecated, will be removed soon. USe the MPIComm interface instead.

◆ mpi_rank()

int strumpack::mpi_rank ( MPI_Comm  c = MPI_COMM_WORLD)
inline

Return this process rank in MPI communicator c, or in MPI_COMM_WORLD if c is not provided.

This routine is deprecated, will be removed soon. USe the MPIComm interface instead.

◆ mpi_type()

template<typename T >
MPI_Datatype strumpack::mpi_type ( )

Return the corresponding MPI_Datatype, for simple C++ data types.

Template Parameters
TC++ type for which to return the corresponding MPI_Datatype

◆ mpi_type< bool >()

template<>
MPI_Datatype strumpack::mpi_type< bool > ( )
inline

return MPI datatype for C++ bool

◆ mpi_type< char >()

template<>
MPI_Datatype strumpack::mpi_type< char > ( )
inline

return MPI datatype for C++ char

◆ mpi_type< double >()

template<>
MPI_Datatype strumpack::mpi_type< double > ( )
inline

return MPI datatype for C++ double

◆ mpi_type< float >()

template<>
MPI_Datatype strumpack::mpi_type< float > ( )
inline

return MPI datatype for C++ float

◆ mpi_type< int >()

template<>
MPI_Datatype strumpack::mpi_type< int > ( )
inline

return MPI datatype for C++ int

◆ mpi_type< long >()

template<>
MPI_Datatype strumpack::mpi_type< long > ( )
inline

return MPI datatype for C++ long

◆ mpi_type< long long int >()

template<>
MPI_Datatype strumpack::mpi_type< long long int > ( )
inline

return MPI datatype for C++ long long int

◆ mpi_type< std::complex< double > >()

template<>
MPI_Datatype strumpack::mpi_type< std::complex< double > > ( )
inline

return MPI datatype for C++ std::complex<double>

◆ mpi_type< std::complex< float > >()

template<>
MPI_Datatype strumpack::mpi_type< std::complex< float > > ( )
inline

return MPI datatype for C++ std::complex<float>

◆ mpi_type< std::pair< int, int > >()

template<>
MPI_Datatype strumpack::mpi_type< std::pair< int, int > > ( )
inline

return MPI datatype for C++ std::pair<int,int>

◆ mpi_type< std::pair< long int, long int > >()

template<>
MPI_Datatype strumpack::mpi_type< std::pair< long int, long int > > ( )
inline

return MPI datatype for C++ std::pair<long int,long int>

◆ mpi_type< std::pair< long long int, long long int > >()

template<>
MPI_Datatype strumpack::mpi_type< std::pair< long long int, long long int > > ( )
inline

return MPI datatype for C++ std::pair<long long int,long long int>

◆ mpi_type< unsigned long >()

template<>
MPI_Datatype strumpack::mpi_type< unsigned long > ( )
inline

return MPI datatype for C++ unsigned long

◆ norm1_distance()

template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type>
real_t strumpack::norm1_distance ( std::size_t  d,
const scalar_t *  x,
const scalar_t *  y 
)

Evaluate the 1-norm distance between two points x and y.

Template Parameters
scalar_tdatatype of the points
real_treal type corresponding to scalar_t
Parameters
ddimension of the points
xpointer to first point (stored with stride 1)
xpointer to second point (stored with stride 1)

◆ operator<<()

std::ostream & strumpack::operator<< ( std::ostream &  os,
const BLACSGrid g 
)
inline

Print some info about the BLACS grid to stream os. Just used for debugging.

◆ orthogonalize_flops()

template<typename scalar_t >
long long int strumpack::orthogonalize_flops ( const DenseMatrix< scalar_t > &  a)

return number of flops for orthogonalization

◆ solve_flops()

template<typename scalar_t >
long long int strumpack::solve_flops ( const DenseMatrix< scalar_t > &  b)

return number of flops for solve, using LU factorization

◆ trmm()

template<typename scalar_t >
void strumpack::trmm ( Side  s,
UpLo  ul,
Trans  ta,
Diag  d,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
DenseMatrix< scalar_t > &  b,
int  depth = 0 
)

TRMM performs one of the matrix-matrix operations

B := alpha*op(A)*B, or B := alpha*B*op(A),

where alpha is a scalar, B is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A**T.

◆ trsm()

template<typename scalar_t >
void strumpack::trsm ( Side  s,
UpLo  ul,
Trans  ta,
Diag  d,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
DenseMatrix< scalar_t > &  b,
int  depth = 0 
)

DTRSM solves one of the matrix equations

op( A )*X = alpha*B, or X*op( A ) = alpha*B,

where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of

op( A ) = A or op( A ) = A**T.

The matrix X is overwritten on B.

◆ trsm_flops()

template<typename scalar_t >
long long int strumpack::trsm_flops ( Side  s,
scalar_t  alpha,
const DenseMatrix< scalar_t > &  a,
const DenseMatrix< scalar_t > &  b 
)

return number of flops for a trsm

◆ trsv()

template<typename scalar_t >
void strumpack::trsv ( UpLo  ul,
Trans  ta,
Diag  d,
const DenseMatrix< scalar_t > &  a,
DenseMatrix< scalar_t > &  b,
int  depth = 0 
)

DTRSV solves one of the systems of equations

A*x = b, or A**T*x = b,

where b and x are n element vectors and A is an n by n unit, or non-unit, upper or lower triangular matrix.

◆ vconcat()

template<typename scalar_t >
DenseMatrix< scalar_t > strumpack::vconcat ( const DenseMatrix< scalar_t > &  a,
const DenseMatrix< scalar_t > &  b 
)

Vertically concatenate 2 DenseMatrix objects a and b: [a; b]. Should have a.cols() == b.cols()

Parameters
adense matrix, will be placed on top
bdense matrix, will be below
Returns
[a; b]

◆ wait_all()

void strumpack::wait_all ( std::vector< MPIRequest > &  reqs)
inline

Wait on all MPIRequests in a vector. Note that the MPI_Requests are not stored contiguously, and hence the implementation of this routine cannot use MPI_Waitall, but must wait on all requests individually.

If you need MPI_Waitall (or MPI_Waitany), for performance reasons, you should use a vector<MPI_Request>.