SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
dsp_blas2_dist.c File Reference

Sparse BLAS 2, using some dense BLAS 2 operations. More...

#include "superlu_ddefs.h"
Include dependency graph for dsp_blas2_dist.c:

Functions

void dusolve (int, int, double *, double *)
 
void dlsolve (int, int, double *, double *)
 
void dmatvec (int, int, int, double *, double *, double *)
 
int sp_dgemv_dist (char *trans, double alpha, SuperMatrix *A, double *x, int incx, double beta, double *y, int incy)
 SpGEMV. More...
 

Detailed Description

Sparse BLAS 2, using some dense BLAS 2 operations.

Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)

All rights reserved.

The source code is distributed under BSD license, see the file License.txt at the top-level directory.

-- Distributed SuperLU routine (version 1.0) --
Lawrence Berkeley National Lab, Univ. of California Berkeley.
September 1, 1999

Function Documentation

◆ dlsolve()

void dlsolve ( int  ldm,
int  ncol,
double *  M,
double *  rhs 
)
Solves a dense UNIT lower triangular system. The unit lower
triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
The solution will be returned in the rhs vector.
Here is the caller graph for this function:

◆ dmatvec()

void dmatvec ( int  ldm,
int  nrow,
int  ncol,
double *  M,
double *  vec,
double *  Mxvec 
)
Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
Here is the caller graph for this function:

◆ dusolve()

void dusolve ( int  ldm,
int  ncol,
double *  M,
double *  rhs 
)
Solves a dense upper triangular system. The upper triangular matrix is
stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
in the rhs vector.
Here is the caller graph for this function:

◆ sp_dgemv_dist()

int sp_dgemv_dist ( char *  trans,
double  alpha,
SuperMatrix A,
double *  x,
int  incx,
double  beta,
double *  y,
int  incy 
)

SpGEMV.

     Purpose
     =======

     sp_dtrsv_dist() solves one of the systems of equations   
         A*x = b,   or   A'*x = b,
     where b and x are n element vectors and A is a sparse unit , or   
     non-unit, upper or lower triangular matrix.   
     No test for singularity or near-singularity is included in this   
     routine. Such tests must be performed before calling this routine.   

     Parameters   
     ==========   

     uplo   - (input) char*
              On entry, uplo specifies whether the matrix is an upper or   
               lower triangular matrix as follows:   
                  uplo = 'U' or 'u'   A is an upper triangular matrix.   
                  uplo = 'L' or 'l'   A is a lower triangular matrix.   

     trans  - (input) char*
               On entry, trans specifies the equations to be solved as   
               follows:   
                  trans = 'N' or 'n'   A*x = b.   
                  trans = 'T' or 't'   A'*x = b.   
                  trans = 'C' or 'c'   A'*x = b.   

     diag   - (input) char*
               On entry, diag specifies whether or not A is unit   
               triangular as follows:   
                  diag = 'U' or 'u'   A is assumed to be unit triangular.   
                  diag = 'N' or 'n'   A is not assumed to be unit   
                                      triangular.   

     L       - (input) SuperMatrix*
           The factor L from the factorization Pr*A*Pc=L*U. Use
               compressed row subscripts storage for supernodes, i.e.,
               L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.

     U       - (input) SuperMatrix*
            The factor U from the factorization Pr*A*Pc=L*U.
            U has types: Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.

     x       - (input/output) double*
               Before entry, the incremented array X must contain the n   
               element right-hand side vector b. On exit, X is overwritten 
               with the solution vector x.

     info    - (output) int*
               If *info = -i, the i-th argument had an illegal value.
   
 */
int
sp_dtrsv_dist(char *uplo, char *trans, char *diag, SuperMatrix *L, 
          SuperMatrix *U, double *x, int *info)
{




    SCformat *Lstore;
    NCformat *Ustore;
    double   *Lval, *Uval;
    int incx = 1, incy = 1;
    double alpha = 1.0, beta = 1.0;
    int nrow;
    int fsupc, nsupr, nsupc, luptr, istart, irow;
    int i, k, iptr, jcol;
    double *work;
    flops_t solve_ops;
    /*extern SuperLUStat_t SuperLUStat;*/

    /* Test the input parameters */
    *info = 0;
    if ( strncmp(uplo,"L",1) != 0 && strncmp(uplo, "U",1) !=0 ) *info = -1;
    else if ( strncmp(trans, "N",1) !=0 && strncmp(trans, "T", 1) !=0 )
    *info = -2;
    else if ( strncmp(diag, "U", 1) !=0 && strncmp(diag, "N", 1) != 0 )
    *info = -3;
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
    if ( *info ) {
    i = -(*info);
    xerr_dist("sp_dtrsv_dist", &i);
    return 0;
    }

    Lstore = (SCformat *) L->Store;
    Lval = (double *) Lstore->nzval;
    Ustore = (NCformat *) U->Store;
    Uval = (double *) Ustore->nzval;
    solve_ops = 0;

    if ( !(work = doubleCalloc_dist(L->nrow)) )
    ABORT("Malloc fails for work in sp_dtrsv_dist().");

    if ( strncmp(trans, "N", 1)==0 ) {  /* Form x := inv(A)*x. */

    if ( strncmp(uplo, "L", 1)==0 ) {
        /* Form x := inv(L)*x */
            if ( L->nrow == 0 ) return 0; /* Quick return */

        for (k = 0; k <= Lstore->nsuper; k++) {
        fsupc = SuperLU_L_FST_SUPC(k);
        istart = SuperLU_L_SUB_START(fsupc);
        nsupr = SuperLU_L_SUB_START(fsupc+1) - istart;
        nsupc = SuperLU_L_FST_SUPC(k+1) - fsupc;
        luptr = SuperLU_L_NZ_START(fsupc);
        nrow = nsupr - nsupc;
            solve_ops += nsupc * (nsupc - 1);
            solve_ops += 2 * nrow * nsupc;
        if ( nsupc == 1 ) {
            for (iptr=istart+1; iptr < SuperLU_L_SUB_START(fsupc+1); ++iptr) {
            irow = SuperLU_L_SUB(iptr);
            ++luptr;
            x[irow] -= x[fsupc] * Lval[luptr];
            }
        } else {


















            dlsolve (nsupr, nsupc, &Lval[luptr], &x[fsupc]);

            dmatvec (nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
            &x[fsupc], &work[0] );


            iptr = istart + nsupc;
            for (i = 0; i < nrow; ++i, ++iptr) {
            irow = SuperLU_L_SUB(iptr);
            x[irow] -= work[i]; /* Scatter */
            work[i] = 0.0;
            }
        }
        } /* for k ... */

    } else {
        /* Form x := inv(U)*x */

        if ( U->nrow == 0 ) return 0; /* Quick return */

        for (k = Lstore->nsuper; k >= 0; k--) {
            fsupc = SuperLU_L_FST_SUPC(k);
            nsupr = SuperLU_L_SUB_START(fsupc+1) - SuperLU_L_SUB_START(fsupc);
            nsupc = SuperLU_L_FST_SUPC(k+1) - fsupc;
            luptr = SuperLU_L_NZ_START(fsupc);
                solve_ops += nsupc * (nsupc + 1);

        if ( nsupc == 1 ) {
            x[fsupc] /= Lval[luptr];
            for (i = SuperLU_U_NZ_START(fsupc); i < SuperLU_U_NZ_START(fsupc+1); ++i) {
            irow = SuperLU_U_SUB(i);
            x[irow] -= x[fsupc] * Uval[i];
            }

        } else {











            dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );


            for (jcol = fsupc; jcol < SuperLU_L_FST_SUPC(k+1); jcol++) {
                solve_ops += 2*(SuperLU_U_NZ_START(jcol+1) - SuperLU_U_NZ_START(jcol));
                for (i = SuperLU_U_NZ_START(jcol); i < SuperLU_U_NZ_START(jcol+1); 
                i++) {
                irow = SuperLU_U_SUB(i);
                x[irow] -= x[jcol] * Uval[i];
                }
                    }
        }
        } /* for k ... */

    }
    } else { /* Form x := inv(A')*x */

    if ( strncmp(uplo, "L", 1)==0 ) {
        /* Form x := inv(L')*x */
            if ( L->nrow == 0 ) return 0; /* Quick return */

        for (k = Lstore->nsuper; k >= 0; --k) {
            fsupc = SuperLU_L_FST_SUPC(k);
            istart = SuperLU_L_SUB_START(fsupc);
            nsupr = SuperLU_L_SUB_START(fsupc+1) - istart;
            nsupc = SuperLU_L_FST_SUPC(k+1) - fsupc;
            luptr = SuperLU_L_NZ_START(fsupc);

        solve_ops += 2 * (nsupr - nsupc) * nsupc;
        for (jcol = fsupc; jcol < SuperLU_L_FST_SUPC(k+1); jcol++) {
            iptr = istart + nsupc;
            for (i = SuperLU_L_NZ_START(jcol) + nsupc; 
                i < SuperLU_L_NZ_START(jcol+1); i++) {
            irow = SuperLU_L_SUB(iptr);
            x[jcol] -= x[irow] * Lval[i];
            iptr++;
            }
        }

        if ( nsupc > 1 ) {
            solve_ops += nsupc * (nsupc - 1);













            dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
            &x[fsupc], &incx);

        }
        }
    } else {
        /* Form x := inv(U')*x */
        if ( U->nrow == 0 ) return 0; /* Quick return */

        for (k = 0; k <= Lstore->nsuper; k++) {
            fsupc = SuperLU_L_FST_SUPC(k);
            nsupr = SuperLU_L_SUB_START(fsupc+1) - SuperLU_L_SUB_START(fsupc);
            nsupc = SuperLU_L_FST_SUPC(k+1) - fsupc;
            luptr = SuperLU_L_NZ_START(fsupc);

        for (jcol = fsupc; jcol < SuperLU_L_FST_SUPC(k+1); jcol++) {
            solve_ops += 2*(SuperLU_U_NZ_START(jcol+1) - SuperLU_U_NZ_START(jcol));
            for (i = SuperLU_U_NZ_START(jcol); i < SuperLU_U_NZ_START(jcol+1); i++) {
            irow = SuperLU_U_SUB(i);
            x[jcol] -= x[irow] * Uval[i];
            }
        }

        solve_ops += nsupc * (nsupc + 1);
        if ( nsupc == 1 ) {
            x[fsupc] /= Lval[luptr];
        } else {












            dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
                &x[fsupc], &incx);

        }
        } /* for k ... */
    }
    }

    /*SuperLUStat.ops[SOLVE] += solve_ops;*/
    SUPERLU_FREE(work);
    return 0;
} /* sp_dtrsv_dist */


/*!
  Purpose   
    =======   

    sp_dgemv_dist()  performs one of the matrix-vector operations   
       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
    where alpha and beta are scalars, x and y are vectors and A is a
    sparse A->nrow by A->ncol matrix.   

    Parameters   
    ==========   

    TRANS  - (input) char*
             On entry, TRANS specifies the operation to be performed as   
             follows:   
                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   

    ALPHA  - (input) double
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
             Currently, the type of A can be:
                 Stype = SLU_NC or SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. 
             In the future, more general A can be handled.

    X      - (input) double*, array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   

    INCX   - (input) int
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   

    BETA   - (input) double
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   

    Y      - (output) double*,  array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
             updated vector y.

    INCY   - (input) int
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   

    ==== Sparse Level 2 Blas routine.   
Here is the call graph for this function:
Here is the caller graph for this function: