Namespaces | |
namespace | BLR |
namespace | HODLR |
namespace | HSS |
namespace | kernel |
namespace | random |
namespace | structured |
Classes | |
class | BLACSGrid |
This is a small wrapper class around a BLACS grid and a BLACS context. More... | |
class | CompressedSparseMatrix |
Abstract base class for compressed sparse matrix storage. More... | |
class | CSRGraph |
class | CSRMatrix |
Class for storing a compressed sparse row matrix (single node). More... | |
class | CSRMatrixMPI |
Block-row distributed compressed sparse row storage. More... | |
class | DenseMatrix |
This class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK routines. More... | |
class | DenseMatrixWrapper |
Like DenseMatrix, this class represents a matrix, stored in column major format, to allow direct use of BLAS/LAPACK routines. However, objects of the DenseMatrixWrapper class do not allocate, own or free any data. More... | |
class | DistributedMatrix |
2D block cyclicly distributed matrix, as used by ScaLAPACK. More... | |
class | DistributedMatrixWrapper |
class | EliminationTree |
class | EliminationTreeMPIDist |
class | Equilibration |
class | ExtendAdd |
class | FrontalMatrixBLRMPI |
class | MatchingData |
class | MatrixReordering |
class | MatrixReorderingMPI |
class | MPIComm |
Wrapper class around an MPI_Comm object. More... | |
class | MPIRequest |
Wrapper around an MPI_Request object. More... | |
class | SparseSolver |
SparseSolver is the main sequential or multithreaded sparse solver class. More... | |
class | SparseSolverBase |
SparseSolverBase is the virtual base for both the sequential/multithreaded and distributed sparse solver classes. More... | |
class | SparseSolverMixedPrecision |
SparseSolverMixedPrecision Allows to use lower precision (float) for the factorization/preconditioner, and higher (double) for the outer iterative solver. See also LAPACK's dsgesv. More... | |
class | SparseSolverMPIDist |
This is the fully distributed solver. More... | |
class | SPMVBuffers |
class | SPOptions |
Options for the sparse solver. More... | |
Typedefs | |
template<typename scalar_t , typename integer_t > | |
using | StrumpackSparseSolverBase = SparseSolverBase< scalar_t, integer_t > |
template<typename scalar_t , typename integer_t > | |
using | StrumpackSparseSolver = SparseSolver< scalar_t, integer_t > |
template<typename factor_t , typename refine_t , typename integer_t > | |
using | StrumpackSparseSolverMixedPrecision = SparseSolverMixedPrecision< factor_t, refine_t, integer_t > |
template<typename scalar_t , typename integer_t > | |
using | StrumpackSparseSolverMPIDist = SparseSolverMPIDist< scalar_t, integer_t > |
Enumerations | |
enum class | ProportionalMapping { FLOPS , FACTOR_MEMORY , PEAK_MEMORY } |
enum class | ReorderingStrategy { NATURAL , METIS , PARMETIS , SCOTCH , PTSCOTCH , RCM , GEOMETRIC , AMD , MMD , AND , MLF , SPECTRAL } |
enum class | CompressionType { NONE , HSS , BLR , HODLR , BLR_HODLR , ZFP_BLR_HODLR , LOSSLESS , LOSSY } |
enum class | MatchingJob { NONE , MAX_CARDINALITY , MAX_SMALLEST_DIAGONAL , MAX_SMALLEST_DIAGONAL_2 , MAX_DIAGONAL_SUM , MAX_DIAGONAL_PRODUCT_SCALING , COMBBLAS } |
enum class | EquilibrationType : char { NONE ='N' , ROW ='R' , COLUMN ='C' , BOTH ='B' } |
enum class | GramSchmidtType { CLASSICAL , MODIFIED } |
enum class | KrylovSolver { AUTO , DIRECT , REFINE , PREC_GMRES , GMRES , PREC_BICGSTAB , BICGSTAB } |
enum class | ReturnCode { SUCCESS , MATRIX_NOT_SET , REORDERING_ERROR , ZERO_PIVOT , NO_CONVERGENCE , INACCURATE_INERTIA } |
Enumeration for the possible return codes. More... | |
enum class | Trans : char { N ='N' , C ='C' , T ='T' } |
enum class | Side : char { L ='L' , R ='R' } |
enum class | UpLo : char { U ='U' , L ='L' } |
enum class | Diag : char { U ='U' , N ='N' } |
enum class | Jobz : char { N ='N' , V ='V' } |
enum class | ClusteringAlgorithm { NATURAL , TWO_MEANS , KD_TREE , PCA , COBBLE } |
Functions | |
std::string | get_name (ReorderingStrategy method) |
bool | is_parallel (ReorderingStrategy method) |
std::string | get_name (CompressionType comp) |
MatchingJob | get_matching (int job) |
int | get_matching (MatchingJob job) |
std::string | get_description (MatchingJob job) |
template<typename real_t > | |
real_t | default_rel_tol () |
template<typename real_t > | |
real_t | default_abs_tol () |
template<> | |
float | default_rel_tol () |
template<> | |
float | default_abs_tol () |
int | default_gpu_streams () |
std::ostream & | operator<< (std::ostream &os, ReturnCode &e) |
template<typename scalar_t , typename integer_t , typename cast_t > | |
CSRMatrix< cast_t, integer_t > | cast_matrix (const CSRMatrix< scalar_t, integer_t > &mat) |
template<typename scalar_t , typename integer_t , typename cast_t > | |
CSRMatrixMPI< cast_t, integer_t > | cast_matrix (const CSRMatrixMPI< scalar_t, integer_t > &mat) |
Trans | c2T (char op) |
template<typename scalar_t > | |
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > | ConstDenseMatrixWrapperPtr (std::size_t m, std::size_t n, const scalar_t *D, std::size_t ld) |
template<typename scalar_t > | |
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > | ConstDenseMatrixWrapperPtr (std::size_t m, std::size_t n, const DenseMatrix< scalar_t > &D, std::size_t i, std::size_t j) |
template<typename scalar_from_t , typename scalar_to_t > | |
void | copy (std::size_t m, std::size_t n, const DenseMatrix< scalar_from_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_to_t > &b, std::size_t ib, std::size_t jb) |
template<typename scalar_from_t , typename scalar_to_t > | |
void | copy (const DenseMatrix< scalar_from_t > &a, DenseMatrix< scalar_to_t > &b, std::size_t ib=0, std::size_t jb=0) |
template<typename scalar_t > | |
void | copy (const DenseMatrix< scalar_t > &a, scalar_t *b, std::size_t ldb) |
template<typename scalar_t > | |
DenseMatrix< scalar_t > | vconcat (const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b) |
template<typename scalar_t > | |
DenseMatrix< scalar_t > | hconcat (const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b) |
template<typename scalar_t > | |
DenseMatrix< scalar_t > | eye (std::size_t m, std::size_t n) |
template<typename scalar_t > | |
void | gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0) |
template<typename scalar_t > | |
void | gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t *b, int ldb, scalar_t beta, DenseMatrix< scalar_t > &c, int depth=0) |
template<typename scalar_t > | |
void | gemm (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta, scalar_t *c, int ldc, int depth=0) |
template<typename scalar_t > | |
void | trmm (Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0) |
template<typename scalar_t > | |
void | trsm (Side s, UpLo ul, Trans ta, Diag d, scalar_t alpha, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0) |
template<typename scalar_t > | |
void | trsv (UpLo ul, Trans ta, Diag d, const DenseMatrix< scalar_t > &a, DenseMatrix< scalar_t > &b, int depth=0) |
template<typename scalar_t > | |
void | gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0) |
template<typename scalar_t > | |
void | gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t *x, int incx, scalar_t beta, DenseMatrix< scalar_t > &y, int depth=0) |
template<typename scalar_t > | |
void | gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &x, scalar_t beta, scalar_t *y, int incy, int depth=0) |
template<typename scalar_t > | |
void | gemv (Trans ta, scalar_t alpha, const DenseMatrix< scalar_t > &a, const scalar_t *x, int incx, scalar_t beta, scalar_t *y, int incy, int depth=0) |
template<typename scalar_t > | |
long long int | LU_flops (const DenseMatrix< scalar_t > &a) |
template<typename scalar_t > | |
long long int | solve_flops (const DenseMatrix< scalar_t > &b) |
template<typename scalar_t > | |
long long int | LQ_flops (const DenseMatrix< scalar_t > &a) |
template<typename scalar_t > | |
long long int | ID_row_flops (const DenseMatrix< scalar_t > &a, int rank) |
template<typename scalar_t > | |
long long int | trsm_flops (Side s, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b) |
template<typename scalar_t > | |
long long int | gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, const DenseMatrix< scalar_t > &b, scalar_t beta) |
template<typename scalar_t > | |
long long int | gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DenseMatrix< scalar_t > &a, scalar_t beta, const DenseMatrix< scalar_t > &c) |
template<typename scalar_t > | |
long long int | orthogonalize_flops (const DenseMatrix< scalar_t > &a) |
template<typename scalar_t , typename cast_t > | |
DenseMatrix< cast_t > | cast_matrix (const DenseMatrix< scalar_t > &mat) |
template<typename scalar_t > | |
void | copy (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &a, std::size_t ia, std::size_t ja, DenseMatrix< scalar_t > &b, int dest, int context_all) |
template<typename scalar_t > | |
void | copy (std::size_t m, std::size_t n, const DenseMatrix< scalar_t > &a, int src, DistributedMatrix< scalar_t > &b, std::size_t ib, std::size_t jb, int context_all) |
template<typename scalar_t > | |
void | copy (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &a, std::size_t ia, std::size_t ja, DistributedMatrix< scalar_t > &b, std::size_t ib, std::size_t jb, int context_all) |
template<typename scalar_t > | |
long long int | LU_flops (const DistributedMatrix< scalar_t > &a) |
template<typename scalar_t > | |
long long int | solve_flops (const DistributedMatrix< scalar_t > &b) |
template<typename scalar_t > | |
long long int | LQ_flops (const DistributedMatrix< scalar_t > &a) |
template<typename scalar_t > | |
long long int | ID_row_flops (const DistributedMatrix< scalar_t > &a, int rank) |
template<typename scalar_t > | |
long long int | trsm_flops (Side s, scalar_t alpha, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b) |
template<typename scalar_t > | |
long long int | gemm_flops (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, scalar_t beta) |
template<typename scalar_t > | |
long long int | gemv_flops (Trans ta, const DistributedMatrix< scalar_t > &a, scalar_t alpha, scalar_t beta) |
template<typename scalar_t > | |
long long int | orthogonalize_flops (const DistributedMatrix< scalar_t > &a) |
template<typename scalar_t > | |
std::unique_ptr< const DistributedMatrixWrapper< scalar_t > > | ConstDistributedMatrixWrapperPtr (std::size_t m, std::size_t n, const DistributedMatrix< scalar_t > &D, std::size_t i, std::size_t j) |
template<typename scalar_t > | |
void | gemm (Trans ta, Trans tb, scalar_t alpha, const DistributedMatrix< scalar_t > &A, const DistributedMatrix< scalar_t > &B, scalar_t beta, DistributedMatrix< scalar_t > &C) |
template<typename scalar_t > | |
void | trsm (Side s, UpLo u, Trans ta, Diag d, scalar_t alpha, const DistributedMatrix< scalar_t > &A, DistributedMatrix< scalar_t > &B) |
template<typename scalar_t > | |
void | trsv (UpLo ul, Trans ta, Diag d, const DistributedMatrix< scalar_t > &A, DistributedMatrix< scalar_t > &B) |
template<typename scalar_t > | |
void | gemv (Trans ta, scalar_t alpha, const DistributedMatrix< scalar_t > &A, const DistributedMatrix< scalar_t > &X, scalar_t beta, DistributedMatrix< scalar_t > &Y) |
template<typename scalar_t > | |
DistributedMatrix< scalar_t > | vconcat (int cols, int arows, int brows, const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, const BLACSGrid *gnew, int cxt_all) |
template<typename scalar_t > | |
void | subgrid_copy_to_buffers (const DistributedMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, int p0, int npr, int npc, std::vector< std::vector< scalar_t > > &sbuf) |
template<typename scalar_t > | |
void | subproc_copy_to_buffers (const DenseMatrix< scalar_t > &a, const DistributedMatrix< scalar_t > &b, int p0, int npr, int npc, std::vector< std::vector< scalar_t > > &sbuf) |
template<typename scalar_t > | |
void | subgrid_add_from_buffers (const BLACSGrid *subg, int master, DistributedMatrix< scalar_t > &b, std::vector< scalar_t * > &pbuf) |
std::ostream & | operator<< (std::ostream &os, const BLACSGrid *g) |
template<typename T > | |
MPI_Datatype | mpi_type () |
template<> | |
MPI_Datatype | mpi_type< char > () |
template<> | |
MPI_Datatype | mpi_type< bool > () |
template<> | |
MPI_Datatype | mpi_type< int > () |
template<> | |
MPI_Datatype | mpi_type< long > () |
template<> | |
MPI_Datatype | mpi_type< unsigned long > () |
template<> | |
MPI_Datatype | mpi_type< long long int > () |
template<> | |
MPI_Datatype | mpi_type< float > () |
template<> | |
MPI_Datatype | mpi_type< double > () |
template<> | |
MPI_Datatype | mpi_type< std::complex< float > > () |
template<> | |
MPI_Datatype | mpi_type< std::complex< double > > () |
template<> | |
MPI_Datatype | mpi_type< std::pair< int, int > > () |
template<> | |
MPI_Datatype | mpi_type< std::pair< long int, long int > > () |
template<> | |
MPI_Datatype | mpi_type< std::pair< long long int, long long int > > () |
void | wait_all (std::vector< MPIRequest > &reqs) |
void | wait_all (std::vector< MPI_Request > &reqs) |
int | mpi_rank (MPI_Comm c=MPI_COMM_WORLD) |
int | mpi_nprocs (MPI_Comm c=MPI_COMM_WORLD) |
std::string | get_name (ClusteringAlgorithm c) |
ClusteringAlgorithm | get_clustering_algorithm (const std::string &c) |
template<typename T > | |
void | pca_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, int *perm) |
template<typename T > | |
structured::ClusterTree | recursive_pca (DenseMatrix< T > &p, std::size_t cluster_size, int *perm) |
template<typename T > | |
void | cobble_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, int *perm) |
template<typename T > | |
structured::ClusterTree | recursive_cobble (DenseMatrix< T > &p, std::size_t cluster_size, int *perm) |
template<typename T > | |
structured::ClusterTree | recursive_2_means (DenseMatrix< T > &p, std::size_t cluster_size, int *perm, std::mt19937 &generator) |
template<typename T > | |
void | kd_partition (DenseMatrix< T > &p, std::vector< std::size_t > &nc, std::size_t cluster_size, int *perm) |
template<typename T > | |
structured::ClusterTree | recursive_kd (DenseMatrix< T > &p, std::size_t cluster_size, int *perm) |
template<typename scalar_t > | |
structured::ClusterTree | binary_tree_clustering (ClusteringAlgorithm algo, DenseMatrix< scalar_t > &p, std::vector< int > &perm, std::size_t cluster_size) |
template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type> | |
real_t | Euclidean_distance_squared (std::size_t d, const scalar_t *x, const scalar_t *y) |
template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type> | |
real_t | Euclidean_distance (std::size_t d, const scalar_t *x, const scalar_t *y) |
template<typename scalar_t , typename real_t = typename RealType<scalar_t>::value_type> | |
real_t | norm1_distance (std::size_t d, const scalar_t *x, const scalar_t *y) |
All of STRUMPACK is contained in the strumpack namespace.
|
strong |
|
strong |
Enumeration of rank-structured data formats, which can be used for compression within the sparse solver.
Enumerator | |
---|---|
NONE | No compression, purely direct solver |
HSS | HSS compression of frontal matrices |
BLR | Block low-rank compression of fronts |
HODLR | Hierarchically Off-diagonal Low-Rank compression of frontal matrices |
BLR_HODLR | Block low-rank compression of medium fronts and Hierarchically Off-diagonal Low-Rank compression of large fronts |
ZFP_BLR_HODLR | ZFP compression for small fronts, Block low-rank compression of medium fronts and Hierarchically Off-diagonal Low-Rank compression of large fronts |
LOSSLESS | Lossless cmpresssion |
LOSSY | Lossy cmpresssion |
|
strong |
|
strong |
|
strong |
|
strong |
Type of outer iterative (Krylov) solver.
|
strong |
Enumeration of possible matching algorithms, used for permutation of the sparse matrix to improve stability.
|
strong |
|
strong |
Enumeration of possible sparse fill-reducing orderings.
|
strong |
Enumeration for the possible return codes.
|
strong |
|
strong |
|
strong |
structured::ClusterTree strumpack::binary_tree_clustering | ( | ClusteringAlgorithm | algo, |
DenseMatrix< scalar_t > & | p, | ||
std::vector< int > & | perm, | ||
std::size_t | cluster_size | ||
) |
Reorder the input data and define a (binary) cluster tree.
algo | ClusteringAlgorithm to use |
p | Input data set. This is a dxn matrix (column major). d is the number of features, n is the number of datapoints. Hence the data is stored point after point (column major). This will be reordered according to perm. |
perm | The permutation. The permutation uses 1-based indexing, so it can be used with lapack permutation routines (such as DenseMatrix::lapmt). This will be resized to the correct size, ie., n == p.cols(). |
cluster_size | Stop partitioning when this cluster_size is reached. This corresponds to the HSS/HODLR leaf size. |
CSRMatrix< cast_t, integer_t > strumpack::cast_matrix | ( | const CSRMatrix< scalar_t, integer_t > & | mat | ) |
Creates a copy of a matrix templated on cast_t and integer_t. Original matrix is unmodified.
scalar_t | value type of original matrix |
integer_t | integer type of original matrix |
cast_t | value type of returned matrix |
mat | const CSRMatrix<scalar_t,integer_t>&, const ref. of input matrix. |
CSRMatrixMPI< cast_t, integer_t > strumpack::cast_matrix | ( | const CSRMatrixMPI< scalar_t, integer_t > & | mat | ) |
Creates a copy of a matrix templated on cast_t and integer_t. Original matrix is unmodified.
scalar_t | value type of original matrix |
integer_t | integer type of original matrix |
cast_t | value type of returned matrix |
mat | const CSRMatrix<scalar_t,integer_t>&, const ref. of input matrix. |
DenseMatrix< cast_t > strumpack::cast_matrix | ( | const DenseMatrix< scalar_t > & | mat | ) |
Creates a copy of a matrix templated on cast_t. Original matrix is unmodified.
scalar_t | value type of original matrix |
cast_t | value type of returned matrix |
mat | const DenseMatrix<scalar_t>&, const ref. of input matrix. |
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > strumpack::ConstDenseMatrixWrapperPtr | ( | std::size_t | m, |
std::size_t | n, | ||
const DenseMatrix< scalar_t > & | D, | ||
std::size_t | i, | ||
std::size_t | j | ||
) |
Create a DenseMatrixWrapper for a const dense matrix. TODO: we need to find a better way to handle this. Should have i+m <= D.rows() and j+n <= D.cols().
m | number of rows of the submatrix (the created wrapper), the view in the matrix D |
n | number of columns of the submatrix |
D | pointer to a dense matrix. This will not be modified, will not be freed. |
ld | leading dimension of D |
i | row offset in the matrix D, denoting the top left corner of the submatrix to be created |
j | column offset in the matrix D, denoting the top left corner of the submatrix to be created |
std::unique_ptr< const DenseMatrixWrapper< scalar_t > > strumpack::ConstDenseMatrixWrapperPtr | ( | std::size_t | m, |
std::size_t | n, | ||
const scalar_t * | D, | ||
std::size_t | ld | ||
) |
Create a DenseMatrixWrapper for a const dense matrix. TODO: we need to find a better way to handle this.
m | number of rows of the submatrix (the created wrapper), the view in the matrix D |
n | number of columns of the submatrix |
D | pointer to a dense matrix. This will not be modified, will not be freed. |
ld | leading dimension of D, ld >= m |
void strumpack::copy | ( | const DenseMatrix< scalar_from_t > & | a, |
DenseMatrix< scalar_to_t > & | b, | ||
std::size_t | ib = 0 , |
||
std::size_t | jb = 0 |
||
) |
Copy matrix a into matrix b at position ib, jb. Should have ib+a.rows() <= b.row() and jb+a.cols() <= b.cols().
a | matrix to copy |
b | matrix to copy to |
ib | row offset of top left corner of place in b to copy to |
jb | column offset of top left corner of place in b to copy to |
void strumpack::copy | ( | const DenseMatrix< scalar_t > & | a, |
scalar_t * | b, | ||
std::size_t | ldb | ||
) |
Copy matrix a into matrix b. Should have ldb >= a.rows(). Matrix b should have been allocated.
a | matrix to copy |
b | dense matrix to copy to |
ldb | leading dimension of b |
void strumpack::copy | ( | std::size_t | m, |
std::size_t | n, | ||
const DenseMatrix< scalar_from_t > & | a, | ||
std::size_t | ia, | ||
std::size_t | ja, | ||
DenseMatrix< scalar_to_t > & | b, | ||
std::size_t | ib, | ||
std::size_t | jb | ||
) |
Copy submatrix of a at ia,ja of size m,n into b at position ib,jb. Should have ia+m <= a.rows(), ja+n <= a.cols(), ib+m <= b.rows() and jb.n <= b.cols().
m | number of rows to copy |
n | number of columns to copy |
a | DenseMatrix to copy from |
ia | row offset of top left corner of submatrix of a to copy |
ja | column offset of top left corner of submatrix of a to copy |
b | matrix to copy to |
ib | row offset of top left corner of place in b to copy to |
jb | column offset of top left corner of place in b to copy to |
void strumpack::copy | ( | std::size_t | m, |
std::size_t | n, | ||
const DistributedMatrix< scalar_t > & | a, | ||
std::size_t | ia, | ||
std::size_t | ja, | ||
DenseMatrix< scalar_t > & | b, | ||
int | dest, | ||
int | context_all | ||
) |
copy submatrix of a DistM_t at ia,ja of size m,n into a DenseM_t b at proc dest
void strumpack::copy | ( | std::size_t | m, |
std::size_t | n, | ||
const DistributedMatrix< scalar_t > & | a, | ||
std::size_t | ia, | ||
std::size_t | ja, | ||
DistributedMatrix< scalar_t > & | b, | ||
std::size_t | ib, | ||
std::size_t | jb, | ||
int | context_all | ||
) |
copy submatrix of a at ia,ja of size m,n into b at position ib,jb
|
inline |
Default absolute tolerance used when solving a linear system. For iterative solvers such as GMRES and BiCGStab, this is the residual tolerance. Exact value depends on the floating point type.
|
inline |
Default relative tolerance used when solving a linear system. For iterative solvers such as GMRES and BiCGStab, this is the relative residual tolerance. Exact value depends on the floating point type.
real_t strumpack::Euclidean_distance | ( | std::size_t | d, |
const scalar_t * | x, | ||
const scalar_t * | y | ||
) |
Evaluate the Euclidean distance between two points x and y.
scalar_t | datatype of the points |
real_t | real type corresponding to scalar_t |
d | dimension of the points |
x | pointer to first point (stored with stride 1) |
y | pointer to second point (stored with stride 1) |
real_t strumpack::Euclidean_distance_squared | ( | std::size_t | d, |
const scalar_t * | x, | ||
const scalar_t * | y | ||
) |
Evaluate the Euclidean distance squared between two points x and y.
scalar_t | datatype of the points |
real_t | real type corresponding to scalar_t |
d | dimension of the points |
x | pointer to first point (stored with stride 1) |
y | pointer to second point (stored with stride 1) |
DenseMatrix< scalar_t > strumpack::eye | ( | std::size_t | m, |
std::size_t | n | ||
) |
Create an identity matrix of size m x n, ie, 1 on the main diagonal, zero everywhere else.
void strumpack::gemm | ( | Trans | ta, |
Trans | tb, | ||
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
const DenseMatrix< scalar_t > & | b, | ||
scalar_t | beta, | ||
DenseMatrix< scalar_t > & | c, | ||
int | depth = 0 |
||
) |
GEMM, defined for DenseMatrix objects (or DenseMatrixWrapper).
DGEMM performs one of the matrix-matrix operations
C := alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T,
alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
depth | current OpenMP task recursion depth |
long long int strumpack::gemm_flops | ( | Trans | ta, |
Trans | tb, | ||
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
const DenseMatrix< scalar_t > & | b, | ||
scalar_t | beta | ||
) |
return number of flops for a gemm, given a and b
long long int strumpack::gemm_flops | ( | Trans | ta, |
Trans | tb, | ||
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
scalar_t | beta, | ||
const DenseMatrix< scalar_t > & | c | ||
) |
return number of flops for a gemm, given a and c
void strumpack::gemv | ( | Trans | ta, |
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
const DenseMatrix< scalar_t > & | x, | ||
scalar_t | beta, | ||
DenseMatrix< scalar_t > & | y, | ||
int | depth = 0 |
||
) |
DGEMV performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
void strumpack::gemv | ( | Trans | ta, |
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
const DenseMatrix< scalar_t > & | x, | ||
scalar_t | beta, | ||
scalar_t * | y, | ||
int | incy, | ||
int | depth = 0 |
||
) |
DGEMV performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
void strumpack::gemv | ( | Trans | ta, |
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
const scalar_t * | x, | ||
int | incx, | ||
scalar_t | beta, | ||
DenseMatrix< scalar_t > & | y, | ||
int | depth = 0 |
||
) |
DGEMV performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
void strumpack::gemv | ( | Trans | ta, |
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
const scalar_t * | x, | ||
int | incx, | ||
scalar_t | beta, | ||
scalar_t * | y, | ||
int | incy, | ||
int | depth = 0 |
||
) |
DGEMV performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.
|
inline |
Return a ClusteringAlgorithm enum based on the input string.
c | String, possible values are 'natural', '2means', 'kdtree', 'pca' and 'cobble'. This is case sensitive. |
std::string strumpack::get_description | ( | MatchingJob | job | ) |
Return a string describing the matching algorithm.
MatchingJob strumpack::get_matching | ( | int | job | ) |
Convert a job number to a MatchingJob enum type.
int strumpack::get_matching | ( | MatchingJob | job | ) |
Convert a MatchingJob enum type to a job number. Prefer to use the MachingJob enum instead of the job number.
|
inline |
Return a short string with the name of the clustering algorithm.
c | Clustering algorithm. |
std::string strumpack::get_name | ( | CompressionType | comp | ) |
Return a name/string for the CompressionType.
std::string strumpack::get_name | ( | ReorderingStrategy | method | ) |
Return a string with the name of the reordering method.
DenseMatrix< scalar_t > strumpack::hconcat | ( | const DenseMatrix< scalar_t > & | a, |
const DenseMatrix< scalar_t > & | b | ||
) |
Horizontally concatenate 2 DenseMatrix objects a and b: [a; b]. Should have a.rows() == b.rows()
a | dense matrix, will be placed left |
b | dense matrix, will be placed right |
long long int strumpack::ID_row_flops | ( | const DenseMatrix< scalar_t > & | a, |
int | rank | ||
) |
return number of flops for interpolative decomposition
bool strumpack::is_parallel | ( | ReorderingStrategy | method | ) |
Check whether or not the reordering needs to be run in parallel.
long long int strumpack::LQ_flops | ( | const DenseMatrix< scalar_t > & | a | ) |
return number of flops for LQ factorization
long long int strumpack::LU_flops | ( | const DenseMatrix< scalar_t > & | a | ) |
return number of flops for LU factorization
|
inline |
Return the number of processes in MPI communicator c, or in MPI_COMM_WORLD if c is not provided.
This routine is deprecated, will be removed soon. USe the MPIComm interface instead.
|
inline |
Return this process rank in MPI communicator c, or in MPI_COMM_WORLD if c is not provided.
This routine is deprecated, will be removed soon. USe the MPIComm interface instead.
MPI_Datatype strumpack::mpi_type | ( | ) |
Return the corresponding MPI_Datatype, for simple C++ data types.
T | C++ type for which to return the corresponding MPI_Datatype |
|
inline |
return MPI datatype for C++ bool
|
inline |
return MPI datatype for C++ char
|
inline |
return MPI datatype for C++ double
|
inline |
return MPI datatype for C++ float
|
inline |
return MPI datatype for C++ int
|
inline |
return MPI datatype for C++ long
|
inline |
return MPI datatype for C++ long long int
|
inline |
return MPI datatype for C++ std::complex<double>
|
inline |
return MPI datatype for C++ std::complex<float>
|
inline |
return MPI datatype for C++ std::pair<int,int>
|
inline |
return MPI datatype for C++ std::pair<long int,long int>
|
inline |
return MPI datatype for C++ std::pair<long long int,long long int>
|
inline |
return MPI datatype for C++ unsigned long
real_t strumpack::norm1_distance | ( | std::size_t | d, |
const scalar_t * | x, | ||
const scalar_t * | y | ||
) |
Evaluate the 1-norm distance between two points x and y.
scalar_t | datatype of the points |
real_t | real type corresponding to scalar_t |
d | dimension of the points |
x | pointer to first point (stored with stride 1) |
x | pointer to second point (stored with stride 1) |
|
inline |
Print some info about the BLACS grid to stream os. Just used for debugging.
long long int strumpack::orthogonalize_flops | ( | const DenseMatrix< scalar_t > & | a | ) |
return number of flops for orthogonalization
long long int strumpack::solve_flops | ( | const DenseMatrix< scalar_t > & | b | ) |
return number of flops for solve, using LU factorization
void strumpack::trmm | ( | Side | s, |
UpLo | ul, | ||
Trans | ta, | ||
Diag | d, | ||
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
DenseMatrix< scalar_t > & | b, | ||
int | depth = 0 |
||
) |
TRMM performs one of the matrix-matrix operations
B := alpha*op(A)*B, or B := alpha*B*op(A),
where alpha is a scalar, B is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A**T.
void strumpack::trsm | ( | Side | s, |
UpLo | ul, | ||
Trans | ta, | ||
Diag | d, | ||
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
DenseMatrix< scalar_t > & | b, | ||
int | depth = 0 |
||
) |
DTRSM solves one of the matrix equations
op( A )*X = alpha*B, or X*op( A ) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of
op( A ) = A or op( A ) = A**T.
The matrix X is overwritten on B.
long long int strumpack::trsm_flops | ( | Side | s, |
scalar_t | alpha, | ||
const DenseMatrix< scalar_t > & | a, | ||
const DenseMatrix< scalar_t > & | b | ||
) |
return number of flops for a trsm
void strumpack::trsv | ( | UpLo | ul, |
Trans | ta, | ||
Diag | d, | ||
const DenseMatrix< scalar_t > & | a, | ||
DenseMatrix< scalar_t > & | b, | ||
int | depth = 0 |
||
) |
DTRSV solves one of the systems of equations
A*x = b, or A**T*x = b,
where b and x are n element vectors and A is an n by n unit, or non-unit, upper or lower triangular matrix.
DenseMatrix< scalar_t > strumpack::vconcat | ( | const DenseMatrix< scalar_t > & | a, |
const DenseMatrix< scalar_t > & | b | ||
) |
Vertically concatenate 2 DenseMatrix objects a and b: [a; b]. Should have a.cols() == b.cols()
a | dense matrix, will be placed on top |
b | dense matrix, will be below |
|
inline |
Wait on all MPIRequests in a vector. Note that the MPI_Requests are not stored contiguously, and hence the implementation of this routine cannot use MPI_Waitall, but must wait on all requests individually.
If you need MPI_Waitall (or MPI_Waitany), for performance reasons, you should use a vector<MPI_Request>.