SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
superlu_defs.h
Go to the documentation of this file.
1
33#ifndef __SUPERLU_DEFS /* allow multiple inclusions */
34#define __SUPERLU_DEFS
35
36/*
37 * File name: superlu_defs.h
38 * Purpose: Definitions which are precision-neutral
39 */
40#ifdef _CRAY
41 #include <fortran.h>
42#endif
43
44#ifdef _OPENMP
45 #include <omp.h>
46#endif
47
48
49#include <mpi.h>
50#include <stdlib.h>
51#include <stdio.h>
52#include <limits.h>
53#include <string.h>
54#include <ctype.h>
55// #include <stdatomic.h>
56#include <math.h>
57#include <stdint.h>
58//#include <malloc.h> Sherry: not available on Mac OS
59// /* Following is for vtune */
60// #if 0
61// #include <ittnotify.h>
62// #define USE_VTUNE
63// #endif
64#if ( VTUNE>=1 )
65#include <ittnotify.h>
66#endif
67
68/*************************************************************************
69 * Constants
70 **************************************************************************/
71/*
72 * You can support older version of SuperLU_DIST.
73 * At compile-time, you can catch the new release as:
74 * #ifdef SUPERLU_DIST_MAIN_VERSION == 5
75 * use the new interface
76 * #else
77 * use the old interface
78 * #endif
79 * Versions 4.x and earlier do not include a #define'd version numbers.
80 */
81#define SUPERLU_DIST_MAJOR_VERSION 7
82#define SUPERLU_DIST_MINOR_VERSION 2
83#define SUPERLU_DIST_PATCH_VERSION 0
84#define SUPERLU_DIST_RELEASE_DATE "December 12, 2021"
85
86#include "superlu_dist_config.h"
87
88#ifdef HAVE_CUDA
89#define GPU_ACC
90//#include "cublas_utils.h"
91#endif
92
93#ifdef HAVE_HIP
94#ifndef GPU_ACC
95#define GPU_ACC
96#endif
97#endif
98
99#ifdef GPU_ACC
100#include "gpu_api_utils.h"
101#endif
102
103
104/* Define my integer size int_t */
105#ifdef _CRAY
106 typedef short int_t;
107 /*#undef int Revert back to int of default size. */
108 #define mpi_int_t MPI_SHORT
109#elif defined (_LONGINT)
110 typedef int64_t int_t;
111 #define mpi_int_t MPI_LONG_LONG_INT
112 #define IFMT "%lld"
113#else /* Default */
114 typedef int int_t;
115 #define mpi_int_t MPI_INT
116 #define IFMT "%8d"
117#endif
118
119
120/* MPI C complex datatype */
121#define SuperLU_MPI_COMPLEX MPI_C_COMPLEX
122#define SuperLU_MPI_DOUBLE_COMPLEX MPI_C_DOUBLE_COMPLEX
123
124/* MPI_Datatype cannot be used in C typedef
125typedef MPI_C_COMPLEX SuperLU_MPI_COMPLEX;
126typedef MPI_C_DOUBLE_COMPLEX SuperLU_MPI_DOUBLE_COMPLEX;
127*/
128
130#include "superlu_FCnames.h"
131#include "superlu_enum_consts.h"
132#include "supermatrix.h"
133#include "util_dist.h"
134#include "psymbfact.h"
135
136
137#define MAX_SUPER_SIZE 512 /* Sherry: moved from superlu_gpu.cu */
138
139
140#define ISORT /* NOTE: qsort() has bug on Mac */
141
142/***********************************************************************
143 * Constants
144 ***********************************************************************/
145/*
146 * For each block column of L, the index[] array contains both the row
147 * subscripts and the integers describing the size of the blocks.
148 * The organization of index[] looks like:
149 *
150 * [ BLOCK COLUMN HEADER (size BC_HEADER)
151 * number of blocks
152 * number of row subscripts, i.e., LDA of nzval[]
153 * BLOCK 0 <----
154 * BLOCK DESCRIPTOR (of size LB_DESCRIPTOR) |
155 * block number (global) |
156 * number of full rows in the block |
157 * actual row subscripts |
158 * BLOCK 1 | Repeat ...
159 * BLOCK DESCRIPTOR | number of blocks
160 * block number (global) |
161 * number of full rows in the block |
162 * actual row subscripts |
163 * . |
164 * . |
165 * . <----
166 * ]
167 *
168 * For each block row of U, the organization of index[] looks like:
169 *
170 * [ BLOCK ROW HEADER (of size BR_HEADER)
171 * number of blocks
172 * number of entries in nzval[]
173 * number of entries in index[]
174 * BLOCK 0 <----
175 * BLOCK DESCRIPTOR (of size UB_DESCRIPTOR) |
176 * block number (global) |
177 * number of nonzeros in the block |
178 * actual fstnz subscripts |
179 * BLOCK 1 | Repeat ...
180 * BLOCK DESCRIPTOR | number of blocks
181 * block number (global) |
182 * number of nonzeros in the block |
183 * actual fstnz subscripts |
184 * . |
185 * . |
186 * . <----
187 * ]
188 *
189 */
190#define BC_HEADER 2
191#define LB_DESCRIPTOR 2
192#define BR_HEADER 3
193#define UB_DESCRIPTOR 2
194#define NBUFFERS 5
195
196/*
197 * Communication tags
198 */
199/* Return the mpi_tag assuming 5 pairs of communications and MPI_TAG_UB >= 5 *
200 * for each supernodal column "num", the five communications are: *
201 * 0,1: for sending L to "right" *
202 * 2,3: for sending off-diagonal blocks of U "down" *
203 * 4 : for sending the diagonal blcok down (in pxgstrf2) */
204//#define SLU_MPI_TAG(id,num) ( (5*(num)+id) % tag_ub )
205
206 /* For numeric factorization. */
207#if 0
208#define NTAGS 10000
209#else
210#define NTAGS INT_MAX
211#endif
212#define UjROW 10
213#define UkSUB 11
214#define UkVAL 12
215#define LkSUB 13
216#define LkVAL 14
217#define LkkDIAG 15
218 /* For triangular solves. */
219#define XK_H 2 /* The header preceding each X block. */
220#define LSUM_H 2 /* The header preceding each MOD block. */
221#define GSUM 20
222#define Xk 21
223#define Yk 22
224#define LSUM 23
225
226
227static const int BC_L=1; /* MPI tag for x in L-solve*/
228static const int RD_L=2; /* MPI tag for lsum in L-solve*/
229static const int BC_U=3; /* MPI tag for x in U-solve*/
230static const int RD_U=4; /* MPI tag for lsum in U-solve*/
231
232/*
233 * Communication scopes
234 */
235#define COMM_ALL 100
236#define COMM_COLUMN 101
237#define COMM_ROW 102
238
239/*
240 * Matrix distribution for sparse matrix-vector multiplication
241 */
242#define SUPER_LINEAR 11
243#define SUPER_BLOCK 12
244
245/*
246 * No of marker arrays used in the symbolic factorization, each of size n
247 */
248#define NO_MARKER 3
249
250
251
252/***********************************************************************
253 * Macros
254 ***********************************************************************/
255#define IAM(comm) { int rank; MPI_Comm_rank ( comm, &rank ); rank};
256#define MYROW(iam,grid) ( (iam) / grid->npcol )
257#define MYCOL(iam,grid) ( (iam) % grid->npcol )
258#define BlockNum(i) ( supno[i] )
259#define FstBlockC(bnum) ( xsup[bnum] )
260#define SuperSize(bnum) ( xsup[bnum+1]-xsup[bnum] )
261#define LBi(bnum,grid) ( (bnum)/grid->nprow )/* Global to local block rowwise */
262#define LBj(bnum,grid) ( (bnum)/grid->npcol )/* Global to local block columnwise*/
263#define PROW(bnum,grid) ( (bnum) % grid->nprow )
264#define PCOL(bnum,grid) ( (bnum) % grid->npcol )
265#define PNUM(i,j,grid) ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */
266#define CEILING(a,b) ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )
267 /* For triangular solves */
268#define RHS_ITERATE(i) \
269 for (i = 0; i < nrhs; ++i)
270#define X_BLK(i) \
271 ilsum[i] * nrhs + (i+1) * XK_H
272#define LSUM_BLK(i) \
273 ilsum[i] * nrhs + (i+1) * LSUM_H
274
275#define SuperLU_timer_ SuperLU_timer_dist_
276#define LOG2(x) (log10((double) x) / log10(2.0))
277
278#if ( VAMPIR>=1 )
279#define VT_TRACEON VT_traceon()
280#define VT_TRACEOFF VT_traceoff()
281#else
282#define VT_TRACEON
283#define VT_TRACEOFF
284#endif
285
286/* Support Windows */
287#ifndef SUPERLU_DIST_EXPORT
288#if MSVC
289#ifdef SUPERLU_DIST_EXPORTS
290#define SUPERLU_DIST_EXPORT __declspec(dllexport)
291#else
292#define SUPERLU_DIST_EXPORT __declspec(dllimport)
293#endif /* SUPERLU_DIST_EXPORTS */
294#else
295#define SUPERLU_DIST_EXPORT
296#endif /* MSVC */
297#endif /* SUPERLU_DIST_EXPORT */
298
299
300/*
301 * CONSTANTS in MAGMA
302 */
303#ifndef MAGMA_CONST
304#define MAGMA_CONST
305
306// #define DIM_X 32
307// #define DIM_Y 16
308
309#define DIM_X 16
310#define DIM_Y 16
311
312
313#define BLK_M DIM_X*4
314#define BLK_N DIM_Y*4
315#define BLK_K 2048/(BLK_M)
316
317#define DIM_XA DIM_X
318#define DIM_YA DIM_Y
319#define DIM_XB DIM_X
320#define DIM_YB DIM_Y
321
322#define NWARP DIM_X*DIM_Y/32
323
324// // // // // // #define TILE_SIZE 32
325
326
327#define THR_M ( BLK_M / DIM_X )
328#define THR_N ( BLK_N / DIM_Y )
329
330#define fetch(A, m, n, bound) offs_d##A[min(n*LD##A+m, bound)]
331#define fma(A, B, C) C += (A*B)
332#endif
333/*---- end MAGMA ----*/
334
335#ifdef __cplusplus
336extern "C" {
337#endif
338
339
340#ifndef max
341 #define cmax(a,b) ((a) > (b) ? (a) : (b))
342#endif
343
344#ifdef __cplusplus
345 }
346#endif
347
348
349/***********************************************************************
350 * New data types
351 ***********************************************************************/
352
353/*
354 * Define the 2D mapping of matrix blocks to process grid.
355 *
356 * Process grid:
357 * Processes are numbered (0 : P-1).
358 * P = Pr x Pc, where Pr, Pc are the number of process rows and columns.
359 * (pr,pc) is the coordinate of IAM; 0 <= pr < Pr, 0 <= pc < Pc.
360 *
361 * Matrix blocks:
362 * Matrix is partitioned according to supernode partitions, both
363 * column and row-wise.
364 * The k-th block columns (rows) contains columns (rows) (s:t), where
365 * s=xsup[k], t=xsup[k+1]-1.
366 * Block A(I,J) contains
367 * rows from (xsup[I]:xsup[I+1]-1) and
368 * columns from (xsup[J]:xsup[J+1]-1)
369 *
370 * Mapping of matrix entry (i,j) to matrix block (I,J):
371 * (I,J) = ( supno[i], supno[j] )
372 *
373 * Mapping of matrix block (I,J) to process grid (pr,pc):
374 * (pr,pc) = ( MOD(I,NPROW), MOD(J,NPCOL) )
375 *
376 * (xsup[nsupers],supno[n]) are replicated on all processors.
377 *
378 */
379
380/*-- Communication subgroup */
381typedef struct {
382 MPI_Comm comm; /* MPI communicator */
383 int Np; /* number of processes */
384 int Iam; /* my process number */
386
387/*-- 2D process grid definition */
388typedef struct {
389 MPI_Comm comm; /* MPI communicator */
390 superlu_scope_t rscp; /* process scope in rowwise, horizontal directon */
391 superlu_scope_t cscp; /* process scope in columnwise, vertical direction */
392 int iam; /* my process number in this grid */
393 int_t nprow; /* number of process rows */
394 int_t npcol; /* number of process columns */
395} gridinfo_t;
396
397/*-- 3D process grid definition */
398typedef struct {
399 MPI_Comm comm; /* MPI communicator */
400 superlu_scope_t rscp; /* row scope */
401 superlu_scope_t cscp; /* column scope */
402 superlu_scope_t zscp; /* scope in third dimension */
403 gridinfo_t grid2d; /* for using 2D functions */
404 int iam; /* my process number in this grid */
405 int_t nprow; /* number of process rows */
406 int_t npcol; /* number of process columns */
407 int_t npdep; /* number of replication factor in Z-dimension */
408 int rankorder; /* = 0: Z-major ( default )
409 * e.g. 1x3x4 grid: layer0 layer1 layer2 layer3
410 * 0 3 6 9
411 * 1 4 7 10
412 * 2 5 8 11
413 * = 1: XY-major (need set env. var.: RANKORDER=XY)
414 * e.g. 1x3x4 grid: layer0 layer1 layer2 layer3
415 * 0 1 2 4
416 * 5 6 7 8
417 * 9 10 11 12
418 */
420
421
422/*
423 *-- The structures are determined by SYMBFACT and used thereafter.
424 *
425 * (xsup,supno) describes mapping between supernode and column:
426 * xsup[s] is the leading column of the s-th supernode.
427 * supno[i] is the supernode no to which column i belongs;
428 * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
429 * xsup 0 1 2 4 7 12
430 * Note: dfs will be performed on supernode rep. relative to the new
431 * row pivoting ordering
432 *
433 * This is allocated during symbolic factorization SYMBFACT.
434 */
435typedef struct {
439
440/*
441 *-- The structures are determined by SYMBFACT and used by DDISTRIBUTE.
442 *
443 * (xlsub,lsub): lsub[*] contains the compressed subscript of
444 * rectangular supernodes; xlsub[j] points to the starting
445 * location of the j-th column in lsub[*]. Note that xlsub
446 * is indexed by column.
447 * Storage: original row subscripts
448 *
449 * During the course of sparse LU factorization, we also use
450 * (xlsub,lsub) for the purpose of symmetric pruning. For each
451 * supernode {s,s+1,...,t=s+r} with first column s and last
452 * column t, the subscript set
453 * lsub[j], j=xlsub[s], .., xlsub[s+1]-1
454 * is the structure of column s (i.e. structure of this supernode).
455 * It is used for the storage of numerical values.
456 * Furthermore,
457 * lsub[j], j=xlsub[t], .., xlsub[t+1]-1
458 * is the structure of the last column t of this supernode.
459 * It is for the purpose of symmetric pruning. Therefore, the
460 * structural subscripts can be rearranged without making physical
461 * interchanges among the numerical values.
462 *
463 * However, if the supernode has only one column, then we
464 * only keep one set of subscripts. For any subscript interchange
465 * performed, similar interchange must be done on the numerical
466 * values.
467 *
468 * The last column structures (for pruning) will be removed
469 * after the numercial LU factorization phase.
470 *
471 * (xusub,usub): xusub[i] points to the starting location of column i
472 * in usub[]. For each U-segment, only the row index of first nonzero
473 * is stored in usub[].
474 *
475 * Each U column consists of a number of full segments. Each full segment
476 * starts from a leading nonzero, running up to the supernode (block)
477 * boundary. (Recall that the column-wise supernode partition is also
478 * imposed on the rows.) Because the segment is full, we don't store all
479 * the row indices. Instead, only the leading nonzero index is stored.
480 * The rest can be found together with xsup/supno pair.
481 * For example,
482 * usub[xsub[j+1]] - usub[xsub[j]] = number of segments in column j.
483 * for any i in usub[],
484 * supno[i] = block number in which i belongs to
485 * xsup[supno[i]+1] = first row of the next block
486 * The nonzeros of this segment are:
487 * i, i+1 ... xsup[supno[i]+1]-1 (only i is stored in usub[])
488 *
489 */
490typedef struct {
491 int_t *lsub; /* compressed L subscripts */
493 int_t *usub; /* compressed U subscripts */
495 int_t nzlmax; /* current max size of lsub */
496 int_t nzumax; /* " " " usub */
497 LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
498 //int_t *llvl; /* keep track of level in L for level-based ILU */
499 //int_t *ulvl; /* keep track of level in U for level-based ILU */
500 int64_t nnzLU; /* number of nonzeros in L+U*/
502
503#if 0 // Sherry: move to precision-dependent file
504/*
505 *-- The structure used to store matrix A of the linear system and
506 * several vectors describing the transformations done to matrix A.
507 *
508 * A (SuperMatrix*)
509 * Matrix A in A*X=B, of dimension (A->nrow, A->ncol).
510 * The number of linear equations is A->nrow. The type of A can be:
511 * Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
512 *
513 * DiagScale (DiagScale_t)
514 * Specifies the form of equilibration that was done.
515 * = NOEQUIL: No equilibration.
516 * = ROW: Row equilibration, i.e., A was premultiplied by diag(R).
517 * = COL: Column equilibration, i.e., A was postmultiplied by diag(C).
518 * = BOTH: Both row and column equilibration, i.e., A was replaced
519 * by diag(R)*A*diag(C).
520 *
521 * R double*, dimension (A->nrow)
522 * The row scale factors for A.
523 * If DiagScale = ROW or BOTH, A is multiplied on the left by diag(R).
524 * If DiagScale = NOEQUIL or COL, R is not defined.
525 *
526 * C double*, dimension (A->ncol)
527 * The column scale factors for A.
528 * If DiagScale = COL or BOTH, A is multiplied on the right by diag(C).
529 * If DiagScale = NOEQUIL or ROW, C is not defined.
530 *
531 * perm_r (int*) dimension (A->nrow)
532 * Row permutation vector which defines the permutation matrix Pr,
533 * perm_r[i] = j means row i of A is in position j in Pr*A.
534 *
535 * perm_c (int*) dimension (A->ncol)
536 * Column permutation vector, which defines the
537 * permutation matrix Pc; perm_c[i] = j means column i of A is
538 * in position j in A*Pc.
539 *
540 */
541typedef struct {
542 DiagScale_t DiagScale;
543 double *R;
544 double *C;
545 int_t *perm_r;
546 int_t *perm_c;
547} ScalePermstruct_t;
548#endif
549
550/*-- Data structure for redistribution of B and X --*/
551typedef struct {
554 int *ptr_to_ibuf, *ptr_to_dbuf;
555
556 /* the following are needed in the hybrid solver PDSLin */
562
563 int_t x2b, b2x;
569
570/*
571 *-- This contains the options used to control the solution process.
572 *
573 * Fact (fact_t)
574 * Specifies whether or not the factored form of the matrix
575 * A is supplied on entry, and if not, how the matrix A should
576 * be factorizaed.
577 * = DOFACT: The matrix A will be factorized from scratch, and the
578 * factors will be stored in L and U.
579 * = SamePattern: The matrix A will be factorized assuming
580 * that a factorization of a matrix with the same sparsity
581 * pattern was performed prior to this one. Therefore, this
582 * factorization will reuse column permutation vector
583 * ScalePermstruct->perm_c and the column elimination tree
584 * LUstruct->etree.
585 * = SamePattern_SameRowPerm: The matrix A will be factorized
586 * assuming that a factorization of a matrix with the same
587 * sparsity pattern and similar numerical values was performed
588 * prior to this one. Therefore, this factorization will reuse
589 * both row and column scaling factors R and C, both row and
590 * column permutation vectors perm_r and perm_c, and the
591 * data structure set up from the previous symbolic factorization.
592 * = FACTORED: On entry, L, U, perm_r and perm_c contain the
593 * factored form of A. If DiagScale is not NOEQUIL, the matrix
594 * A has been equilibrated with scaling factors R and C.
595 *
596 * Equil (yes_no_t)
597 * Specifies whether to equilibrate the system (scale A's row and
598 * columns to have unit norm).
599 *
600 * DiagInv (yes_no_t)
601 * Specifies whether to invert the diagonal blocks of the LU
602 * triangular matrices.
603 *
604 * ColPerm (colperm_t)
605 * Specifies what type of column permutation to use to reduce fill.
606 * = NATURAL: use the natural ordering
607 * = MMD_ATA: use minimum degree ordering on structure of A'*A
608 * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
609 * = COLAMD: use approximate minimum degree column ordering
610 * = MY_PERMC: use the ordering specified by the user
611 *
612 * Trans (trans_t)
613 * Specifies the form of the system of equations:
614 * = NOTRANS: A * X = B (No transpose)
615 * = TRANS: A**T * X = B (Transpose)
616 * = CONJ: A**H * X = B (Transpose)
617 *
618 * IterRefine (IterRefine_t)
619 * Specifies whether to perform iterative refinement.
620 * = NO: no iterative refinement
621 * = SINGLE: perform iterative refinement in single precision
622 * = DOUBLE: perform iterative refinement in double precision
623 * = EXTRA: perform iterative refinement in extra precision
624 *
625 * DiagPivotThresh (double, in [0.0, 1.0]) (only for serial SuperLU)
626 * Specifies the threshold used for a diagonal entry to be an
627 * acceptable pivot.
628 *
629 * SymmetricMode (yest_no_t) (only for serial SuperLU)
630 * Specifies whether to use symmetric mode. Symmetric mode gives
631 * preference to diagonal pivots, and uses an (A'+A)-based column
632 * permutation algorithm.
633 *
634 * PivotGrowth (yes_no_t) (only for serial SuperLU)
635 * Specifies whether to compute the reciprocal pivot growth.
636 *
637 * ConditionNumber (ues_no_t) (only for serial SuperLU)
638 * Specifies whether to compute the reciprocal condition number.
639 *
640 * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU in serial SuperLU)
641 * Specifies whether to permute rows of the original matrix.
642 * = NO: not to permute the rows
643 * = LargeDiag: make the diagonal large relative to the off-diagonal
644 * = MY_PERMR: use the permutation given by the user
645 *
646 * ILU_DropRule (int) (only for serial SuperLU)
647 * Specifies the dropping rule:
648 * = DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau).
649 * = DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n.
650 * = DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column,
651 * p = gamma * nnz(A(:,j)).
652 * = DROP_AREA: Variation of ILUTP, for j-th column, use
653 * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
654 * = DROP_DYNAMIC: Modify the threshold tau during factorizaion:
655 * If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
656 * tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
657 * Otherwise
658 * tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
659 * tau_U(j) uses the similar rule.
660 * NOTE: the thresholds used by L and U are separate.
661 * = DROP_INTERP: Compute the second dropping threshold by
662 * interpolation instead of sorting (default).
663 * In this case, the actual fill ratio is not
664 * guaranteed to be smaller than gamma.
665 * Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
666 * ( Default: DROP_BASIC | DROP_AREA )
667 *
668 * ILU_DropTol (double) (only for serial SuperLU)
669 * numerical threshold for dropping.
670 *
671 * ILU_FillFactor (double) (only for serial SuperLU)
672 * Gamma in the secondary dropping.
673 *
674 * ILU_Norm (norm_t) (only for serial SuperLU)
675 * Specify which norm to use to measure the row size in a
676 * supernode: infinity-norm, 1-norm, or 2-norm.
677 *
678 * ILU_FillTol (double) (only for serial SuperLU)
679 * numerical threshold for zero pivot perturbation.
680 *
681 * ILU_MILU (milu_t) (only for serial SuperLU)
682 * Specifies which version of MILU to use.
683 *
684 * ILU_MILU_Dim (double)
685 * Dimension of the PDE if available.
686 *
687 * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
688 * Specifies whether to replace the tiny diagonals by
689 * sqrt(epsilon)*||A|| during LU factorization.
690 *
691 * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
692 * Specifies whether the initialization has been performed to the
693 * triangular solve.
694 *
695 * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
696 * Specifies whether the initialization has been performed to the
697 * sparse matrix-vector multiplication routine needed in iterative
698 * refinement.
699 *
700 * num_lookaheads (int) (only for SuperLU_DIST)
701 * Specifies the number of levels in the look-ahead factorization
702 *
703 * lookahead_etree (yes_no_t) (only for SuperLU_DIST)
704 * Specifies whether to use the elimination tree computed from the
705 * serial symbolic factorization to perform scheduling.
706 *
707 * SymPattern (yes_no_t) (only for SuperLU_DIST)
708 * Gives the scheduling algorithm a hint whether the matrix
709 * would have symmetric pattern.
710 *
711 */
712typedef struct {
725 double ILU_DropTol; /* threshold for dropping */
726 double ILU_FillFactor; /* gamma in the secondary dropping */
727 norm_t ILU_Norm; /* infinity-norm, 1-norm, or 2-norm */
728 double ILU_FillTol; /* threshold for zero pivot perturbation */
730 double ILU_MILU_Dim; /* Dimension of PDE (if available) */
732 yes_no_t ReplaceTinyPivot; /* used in SuperLU_DIST */
736 //int nnzL, nnzU; /* used to store nnzs for now */
737 int num_lookaheads; /* num of levels in look-ahead */
738 yes_no_t lookahead_etree; /* use etree computed from the
739 serial symbolic factorization */
740 yes_no_t SymPattern; /* symmetric factorization */
741 yes_no_t Algo3d; /* use 3D factorization/solve algorithms */
743
744typedef struct {
745 float for_lu;
746 float total;
748 int64_t nnzL, nnzU;
750
751/*-- Auxiliary data type used in PxGSTRS/PxGSTRS1. */
752typedef struct {
753 int_t lbnum; /* Row block number (local). */
754 int_t indpos; /* Starting position in Uindex[]. */
756
757/*
758 *-- The new structures added in the hybrid GPU + OpenMP + MPI code.
759 */
760typedef struct {
765 int_t eo; /* order of elimination. For 3D algorithm */
769
770typedef struct {
773 int_t eo; /* order of elimination, for 3D code */
778
779typedef struct
780{
781 int id, key;
782 void *next;
783} etree_node;
784
786{
787 int ind;
788 int val;
789};
790
793/*==== For 3D code ====*/
794
795/* return the mpi_tag assuming 5 pairs of communications and MPI_TAG_UB >= 5 *
796 * for each supernodal column, the five communications are: *
797 * 0,1: for sending L to "right" *
798 * 2,3: for sending off-diagonal blocks of U "down" *
799 * 4 : for sending the diagonal blcok down (in pxgstrf2) */
800// int tag_ub;
801// #define SLU_MPI_TAG(id,num) ( (5*(num)+id) % tag_ub )
802
803// #undef SLU_MPI_TAG
804/*defining my own MPI tags */
805/* return the mpi_tag assuming 5 pairs of communications and MPI_TAG_UB >= 5 *
806 * for each supernodal column, the five communications are: *
807 * 0,1: for sending L to "right" *
808 * 2,3: for sending off-diagonal blocks of U "down" *
809 * 4 : for sending the diagonal blcok down (in pxgstrf2) *
810 * 5 : for sending the diagonal L block right () : added by piyush */
811#define SLU_MPI_TAG(id,num) ( (6*(num)+id) % tag_ub )
812
813/*structs for quick look up */
814typedef struct
815{
820
821typedef struct
822{
827
828
829//global variable
830extern double CPU_CLOCK_RATE;
831
832typedef struct
833{
837
838typedef struct
839{
845 int_t* IbcastPanel_L; /*I bcast and recv placed for the k-th L panel*/
846 int_t* IbcastPanel_U; /*I bcast and recv placed for the k-th U panel*/
847 int_t* numChildLeft; /*number of children left to be factored*/
848 int_t* gpuLUreduced; /*New for GPU acceleration*/
850
851typedef struct
852{
867
868typedef struct{
875 int_t depth; // distance from the top
876 double weight; // weight of the supernode
877 double iWeight; // weight of the whole subtree below
878 double scuWeight; // weight of schur complement update = max|n_k||L_k||U_k|
879} treeList_t;
880
881typedef struct
882{
883 int_t numLvl; // number of level in tree;
884 int_t* eTreeTopLims; // boundaries of each level of size
885 int_t* myIperm; // Iperm for my tree size nsupers;
886
888
889typedef struct
890{
891 int_t* setree; // global supernodal elimination tree
893} gEtreeInfo_t;
894
895typedef enum treePartStrat{
896 ND, // nested dissection ordering or natural ordering
897 GD // greedy load balance stregy
899
900typedef struct
901{
902 /* data */
903 int_t nNodes; // total number of nodes
904 int_t* nodeList; // list of nodes, should be in order of factorization
905#if 0 // Sherry: the following array is used on rForest_t. ???
906 int_t* treeHeads;
907#endif
908 /*topological information about the tree*/
909 int_t numLvl; // number of Topological levels in the forest
910 int_t numTrees; // number of tree in the forest
912#if 0 // Sherry fix: the following two structures are in treeTopoInfo_t. ???
913 int_t* eTreeTopLims; // boundaries of each level of size
914 int_t* myIperm; // Iperm for my tree size nsupers;
915#endif
916
917 /*information about load balance*/
918 double weight; // estimated cost
919 double cost; // measured cost
920
921} sForest_t;
922
923typedef struct
924{
925 /* data */
930 MPI_Request* recv_req;
931 MPI_Request* recv_requ;
932 MPI_Request* send_req;
933 MPI_Request* send_requ;
935
936typedef struct
937{
943
945
946typedef struct
947{
948 int* msgcnt;
950} msgs_t;
951
952typedef struct xtrsTimer_t
953{
962 double tfs_comm;
965 double tbs_comm;
968
969 // counters for communication and computation volume
970
975
976 double ppXmem; // perprocess X-memory
978
979/*==== end For 3D code ====*/
980
981/*====================*/
982
983/***********************************************************************
984 * Function prototypes
985 ***********************************************************************/
986
987#ifdef __cplusplus
988extern "C" {
989#endif
990
991extern void superlu_gridinit(MPI_Comm, int, int, gridinfo_t *);
992extern void superlu_gridmap(MPI_Comm, int, int, int [], int, gridinfo_t *);
993extern void superlu_gridexit(gridinfo_t *);
994extern void superlu_gridinit3d(MPI_Comm Bcomm, int nprow, int npcol, int npdep,
995 gridinfo3d_t *grid) ;
996extern void superlu_gridexit3d(gridinfo3d_t *grid);
997
1008 SuperMatrix*);
1009extern int sp_symetree_dist(int_t *, int_t *, int_t *, int_t, int_t *);
1010extern int sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *);
1011extern void get_perm_c_dist(int_t, int_t, SuperMatrix *, int_t *);
1012extern void at_plus_a_dist(const int_t, const int_t, int_t *, int_t *,
1013 int_t *, int_t **, int_t **);
1014extern int genmmd_dist_(int_t *, int_t *, int_t *a,
1015 int_t *, int_t *, int_t *, int_t *,
1016 int_t *, int_t *, int_t *, int_t *, int_t *);
1017extern void bcast_tree(void *, int, MPI_Datatype, int, int,
1018 gridinfo_t *, int, int *);
1024 Glu_freeable_t *);
1026extern void countnz_dist (const int_t, int_t *, int_t *, int_t *,
1028extern int64_t fixupL_dist (const int_t, const int_t *, Glu_persist_t *,
1029 Glu_freeable_t *);
1030extern int_t *TreePostorder_dist (int_t, int_t *);
1031extern float smach_dist(char *);
1032extern double dmach_dist(char *);
1033extern void *superlu_malloc_dist (size_t);
1034extern void superlu_free_dist (void*);
1035extern int *int32Malloc_dist (int);
1036extern int *int32Calloc_dist (int);
1037extern int_t *intMalloc_dist (int_t);
1038extern int_t *intCalloc_dist (int_t);
1039extern int mc64id_dist(int *);
1040extern void arrive_at_ublock (int_t, int_t *, int_t *, int_t *,
1041 int_t *, int_t *, int_t, int_t,
1042 int_t *, int_t *, int_t *, gridinfo_t *);
1044 gridinfo_t *, int_t *, int_t*);
1045
1046/* Auxiliary routines */
1047extern double SuperLU_timer_ ();
1048extern void superlu_abort_and_exit_dist(char *);
1049extern int sp_ienv_dist (int);
1050extern void ifill_dist (int_t *, int_t, int_t);
1051extern void super_stats_dist (int_t, int_t *);
1053 int_t **, int_t **);
1055extern int xerr_dist (char *, int *);
1056extern void pxerr_dist (char *, gridinfo_t *, int_t);
1057extern void PStatInit(SuperLUStat_t *);
1058extern void PStatFree(SuperLUStat_t *);
1060extern void log_memory(int64_t, SuperLUStat_t *);
1061extern void print_memorylog(SuperLUStat_t *, char *);
1062extern int superlu_dist_GetVersionNumber(int *, int *, int *);
1063extern void quickSort( int_t*, int_t, int_t, int_t);
1064extern void quickSortM( int_t*, int_t, int_t, int_t, int_t, int_t);
1065extern int_t partition( int_t*, int_t, int_t, int_t);
1067
1068/* Prototypes for parallel symbolic factorization */
1069extern float symbfact_dist
1070(int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *,
1071 Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, superlu_dist_mem_usage_t *);
1072
1073/* Get the column permutation using parmetis */
1074extern float get_perm_c_parmetis
1075(SuperMatrix *, int_t *, int_t *, int, int,
1076 int_t **, int_t **, gridinfo_t *, MPI_Comm *);
1077
1078/* Auxiliary routines for memory expansions used during
1079 the parallel symbolic factorization routine */
1080
1084
1088
1092
1095
1096#ifdef ISORT
1097extern void isort (int_t N, int_t *ARRAY1, int_t *ARRAY2);
1098extern void isort1 (int_t N, int_t *ARRAY);
1099#else
1100int superlu_sort_perm (const void *arg1, const void *arg2)
1101{
1102 const int_t *val1 = (const int_t *) arg1;
1103 const int_t *val2 = (const int_t *) arg2;
1104 return (*val2 < *val1);
1105}
1106#endif
1107
1108#ifdef GPU_ACC /* GPU related */
1109extern void gemm_division_cpu_gpu (int *, int *, int *, int,
1110 int, int, int *, int);
1111extern int_t get_gpublas_nb ();
1112extern int_t get_num_gpu_streams ();
1113#endif
1114
1115extern double estimate_cpu_time(int m, int n , int k);
1116
1117extern int get_thread_per_process();
1118extern int_t get_max_buffer_size ();
1119extern int_t get_min (int_t *, int_t);
1120extern int compare_pair (const void *, const void *);
1121extern int_t static_partition (struct superlu_pair *, int_t, int_t *, int_t,
1122 int_t *, int_t *, int);
1123extern int get_acc_offload();
1124
1125
1126/* Routines for debugging */
1127extern void print_panel_seg_dist(int_t, int_t, int_t, int_t, int_t *, int_t *);
1128extern void check_repfnz_dist(int_t, int_t, int_t, int_t *);
1129extern int_t CheckZeroDiagonal(int_t, int_t *, int_t *, int_t *);
1130extern void PrintDouble5(char *, int_t, double *);
1131extern void PrintInt10(char *, int_t, int_t *);
1132extern void PrintInt32(char *, int, int *);
1133extern int file_PrintInt10(FILE *, char *, int_t, int_t *);
1134extern int file_PrintInt32(FILE *, char *, int, int *);
1135extern int file_PrintLong10(FILE *, char *, int_t, int_t *);
1136
1137/* Routines for Async_tree communication*/
1138
1139#ifndef __SUPERLU_ASYNC_TREE /* allow multiple inclusions */
1140#define __SUPERLU_ASYNC_TREE
1141typedef struct
1142{
1143 MPI_Request sendRequests_[2];
1144 MPI_Comm comm_;
1147 int myDests_[2];
1150 int tag_;
1152 MPI_Datatype type_;
1153} C_Tree;
1154
1155#ifndef DEG_TREE
1156#define DEG_TREE 2
1157#endif
1158
1159#endif
1160
1161extern void C_RdTree_Create(C_Tree* tree, MPI_Comm comm, int* ranks, int rank_cnt, int msgSize, char precision);
1162extern void C_RdTree_Nullify(C_Tree* tree);
1163extern yes_no_t C_RdTree_IsRoot(C_Tree* tree);
1164extern void C_RdTree_forwardMessageSimple(C_Tree* Tree, void* localBuffer, int msgSize);
1165extern void C_RdTree_waitSendRequest(C_Tree* Tree);
1166
1167extern void C_BcTree_Create(C_Tree* tree, MPI_Comm comm, int* ranks, int rank_cnt, int msgSize, char precision);
1168extern void C_BcTree_Nullify(C_Tree* tree);
1169extern yes_no_t C_BcTree_IsRoot(C_Tree* tree);
1170extern void C_BcTree_forwardMessageSimple(C_Tree* tree, void* localBuffer, int msgSize);
1171extern void C_BcTree_waitSendRequest(C_Tree* tree);
1172
1173/*==== For 3D code ====*/
1174
1175extern void DistPrint(char* function_name, double value, char* Units, gridinfo_t* grid);
1176extern void DistPrint3D(char* function_name, double value, char* Units, gridinfo3d_t* grid3d);
1177extern void treeImbalance3D(gridinfo3d_t *grid3d, SCT_t* SCT);
1178extern void SCT_printComm3D(gridinfo3d_t *grid3d, SCT_t* SCT);
1179
1180// permutation from superLU default
1182 int_t *etree, Glu_persist_t *Glu_persist,
1183 int_t** Lrowind_bc_ptr, int_t** Ufstnz_br_ptr,
1184 gridinfo_t *);
1185
1186/* Manipulate counters */
1187extern void SCT_init(SCT_t*);
1188extern void SCT_print(gridinfo_t *grid, SCT_t* SCT);
1189extern void SCT_print3D(gridinfo3d_t *grid3d, SCT_t* SCT);
1190extern void SCT_free(SCT_t*);
1191
1192extern treeList_t* setree2list(int_t nsuper, int_t* setree );
1193extern int free_treelist(int_t nsuper, treeList_t* treeList);
1194
1195// int_t calcTreeWeight(int_t nsupers, treeList_t* treeList, int_t* xsup);
1196extern int_t calcTreeWeight(int_t nsupers, int_t*setree, treeList_t* treeList, int_t* xsup);
1197extern int_t getDescendList(int_t k, int_t*dlist, treeList_t* treeList);
1198extern int_t getCommonAncestorList(int_t k, int_t* alist, int_t* seTree, treeList_t* treeList);
1199extern int_t getCommonAncsCount(int_t k, treeList_t* treeList);
1200extern int_t* getPermNodeList(int_t nnode, // number of nodes
1201 int_t* nlist, int_t* perm_c_sup,int_t* iperm_c_sup);
1202extern int_t* getEtreeLB(int_t nnodes, int_t* perm_l, int_t* gTopOrder);
1203extern int_t* getSubTreeRoots(int_t k, treeList_t* treeList);
1204// int_t* treeList2perm(treeList_t* , ..);
1205extern int_t* merg_perms(int_t nperms, int_t* nnodes, int_t** perms);
1206// returns a concatenated permutation for three permutation arrays
1207
1208extern int_t* getGlobal_iperm(int_t nsupers, int_t nperms, int_t** perms,
1209 int_t* nnodes);
1210extern int_t log2i(int_t index);
1211extern int_t *supernodal_etree(int_t nsuper, int_t * etree, int_t* supno, int_t *xsup);
1212extern int_t testSubtreeNodelist(int_t nsupers, int_t numList, int_t** nodeList, int_t* nodeCount);
1213extern int_t testListPerm(int_t nodeCount, int_t* nodeList, int_t* permList, int_t* gTopLevel);
1214
1215/*takes supernodal elimination tree and for each
1216 supernode calculates "level" in elimination tree*/
1217extern int_t* topological_ordering(int_t nsuper, int_t* setree);
1218extern int_t* Etree_LevelBoundry(int_t* perm,int_t* tsort_etree, int_t nsuper);
1219
1220/*calculated boundries of the topological levels*/
1221extern int_t* calculate_num_children(int_t nsuper, int_t* setree);
1222extern void Print_EtreeLevelBoundry(int_t *Etree_LvlBdry, int_t max_level, int_t nsuper);
1223extern void print_etree_leveled(int_t *setree, int_t* tsort_etree, int_t nsuper);
1224extern void print_etree(int_t *setree, int_t* iperm, int_t nsuper);
1225extern int_t printFileList(char* sname, int_t nnodes, int_t*dlist, int_t*setree);
1226int* getLastDepBtree( int_t nsupers, treeList_t* treeList);
1227
1228/*returns array R with of size maxLevel with either 0 or 1
1229 R[i] = 1; then Tree[level-i] is set to zero= to only
1230 accumulate the results */
1231extern int_t* getReplicatedTrees( gridinfo3d_t* grid3d);
1232
1233/*returns indices in gNodeList of trees that belongs to my layer*/
1234extern int_t* getGridTrees( gridinfo3d_t* grid3d);
1235
1236
1237/*returns global nodelist*/
1238extern int_t** getNodeList(int_t maxLvl, int_t* setree, int_t* nnodes,
1239 int_t* treeHeads, treeList_t* treeList);
1240
1241/* calculate number of nodes in subtrees starting from treeHead[i]*/
1242extern int_t* calcNumNodes(int_t maxLvl, int_t* treeHeads, treeList_t* treeList);
1243
1244/*Returns list of (last) node of the trees */
1245extern int_t* getTreeHeads(int_t maxLvl, int_t nsupers, treeList_t* treeList);
1246
1247extern int_t* getMyIperm(int_t nnodes, int_t nsupers, int_t* myPerm);
1248
1249extern int_t* getMyTopOrder(int_t nnodes, int_t* myPerm, int_t* myIperm, int_t* setree );
1250
1251extern int_t* getMyEtLims(int_t nnodes, int_t* myTopOrder);
1252
1253
1254extern treeTopoInfo_t getMyTreeTopoInfo(int_t nnodes, int_t nsupers,
1255 int_t* myPerm,int_t* setree);
1256
1257extern sForest_t** getNestDissForests( int_t maxLvl, int_t nsupers, int_t*setree, treeList_t* treeList);
1258
1259extern int_t** getTreePermForest( int_t* myTreeIdxs, int_t* myZeroTrIdxs,
1260 sForest_t* sForests,
1261 int_t* perm_c_supno, int_t* iperm_c_supno,
1262 gridinfo3d_t* grid3d);
1263extern int_t** getTreePermFr( int_t* myTreeIdxs,
1264 sForest_t** sForests, gridinfo3d_t* grid3d);
1265extern int_t* getMyNodeCountsFr(int_t maxLvl, int_t* myTreeIdxs,
1266 sForest_t** sForests);
1267extern int_t** getNodeListFr(int_t maxLvl, sForest_t** sForests);
1268extern int_t* getNodeCountsFr(int_t maxLvl, sForest_t** sForests);
1269// int_t* getNodeToForstMap(int_t nsupers, sForest_t** sForests, gridinfo3d_t* grid3d);
1270extern int* getIsNodeInMyGrid(int_t nsupers, int_t maxLvl, int_t* myNodeCount, int_t** treePerm);
1271extern void printForestWeightCost(sForest_t** sForests, SCT_t* SCT, gridinfo3d_t* grid3d);
1272extern sForest_t** getGreedyLoadBalForests( int_t maxLvl, int_t nsupers, int_t* setree, treeList_t* treeList);
1273extern sForest_t** getForests( int_t maxLvl, int_t nsupers, int_t*setree, treeList_t* treeList);
1274
1275 /* from trfAux.h */
1276extern int_t getBigUSize(int_t nsupers, gridinfo_t *grid, int_t **Lrowind_bc_ptr);
1277extern void getSCUweight(int_t nsupers, treeList_t* treeList, int_t* xsup,
1278 int_t** Lrowind_bc_ptr, int_t** Ufstnz_br_ptr,
1279 gridinfo3d_t * grid3d);
1280extern int Wait_LUDiagSend(int_t k, MPI_Request *U_diag_blk_send_req,
1281 MPI_Request *L_diag_blk_send_req,
1282 gridinfo_t *grid, SCT_t *SCT);
1283
1284extern int getNsupers(int n, Glu_persist_t *Glu_persist);
1285extern int set_tag_ub();
1286extern int getNumThreads(int);
1287extern int_t num_full_cols_U(int_t kk, int_t **Ufstnz_br_ptr, int_t *xsup,
1288 gridinfo_t *, int_t *, int_t *);
1289
1290#if 0 // Sherry: conflicting with existing routine
1291extern int_t estimate_bigu_size(int_t nsupers, int_t ldt, int_t**Ufstnz_br_ptr,
1292 Glu_persist_t *, gridinfo_t*, int_t* perm_u);
1293#endif
1294
1295extern int_t* getFactPerm(int_t);
1296extern int_t* getFactIperm(int_t*, int_t);
1297
1298extern int_t initCommRequests(commRequests_t* comReqs, gridinfo_t * grid);
1299extern int_t initFactStat(int_t nsupers, factStat_t* factStat);
1300extern int freeFactStat(factStat_t* factStat);
1302extern int freeFactNodelists(factNodelists_t* fNlists);
1303extern int_t initMsgs(msgs_t* msgs);
1305extern commRequests_t** initCommRequestsArr(int_t mxLeafNode, int_t ldt, gridinfo_t* grid);
1306extern int freeCommRequestsArr(int_t mxLeafNode, commRequests_t** comReqss);
1307
1308extern msgs_t** initMsgsArr(int_t numLA);
1309extern int freeMsgsArr(int_t numLA, msgs_t **msgss);
1310
1313
1314 /* from sec_structs.h */
1315extern int Cmpfunc_R_info (const void * a, const void * b);
1316extern int Cmpfunc_U_info (const void * a, const void * b);
1317extern int sort_R_info( Remain_info_t* Remain_info, int n );
1318extern int sort_U_info( Ublock_info_t* Ublock_info, int n );
1319extern int sort_R_info_elm( Remain_info_t* Remain_info, int n );
1320extern int sort_U_info_elm( Ublock_info_t* Ublock_info, int n );
1321
1322 /* from pdgstrs.h */
1323extern void printTRStimer(xtrsTimer_t *xtrsTimer, gridinfo3d_t *grid3d);
1324extern void initTRStimer(xtrsTimer_t *xtrsTimer, gridinfo_t *grid);
1325
1326 /* from p3dcomm.c */
1327extern int_t** getTreePerm( int_t* myTreeIdxs, int_t* myZeroTrIdxs,
1328 int_t* nodeCount, int_t** nodeList,
1329 int_t* perm_c_supno, int_t* iperm_c_supno,
1330 gridinfo3d_t* grid3d);
1331extern int_t* getMyNodeCounts(int_t maxLvl, int_t* myTreeIdxs, int_t* gNodeCount);
1332extern int_t checkIntVector3d(int_t* vec, int_t len, gridinfo3d_t* grid3d);
1333extern int_t reduceStat(PhaseType PHASE, SuperLUStat_t *stat, gridinfo3d_t * grid3d);
1334
1335 /* from communication_aux.h */
1336extern int_t Wait_LSend(int_t k, gridinfo_t *grid, int **ToSendR,
1337 MPI_Request *s, SCT_t*);
1338extern int_t Wait_USend(MPI_Request *, gridinfo_t *, SCT_t *);
1339extern int_t Check_LRecv(MPI_Request*, int* msgcnt);
1340extern int_t Wait_UDiagBlockSend(MPI_Request *, gridinfo_t *, SCT_t *);
1341extern int_t Wait_LDiagBlockSend(MPI_Request *, gridinfo_t *, SCT_t *);
1342extern int_t Wait_UDiagBlock_Recv(MPI_Request *, SCT_t *);
1343extern int_t Test_UDiagBlock_Recv(MPI_Request *, SCT_t *);
1344extern int_t Wait_LDiagBlock_Recv(MPI_Request *, SCT_t *);
1345extern int_t Test_LDiagBlock_Recv(MPI_Request *, SCT_t *);
1346extern int_t LDiagBlockRecvWait( int_t k, int_t* factored_U, MPI_Request *, gridinfo_t *);
1347
1348 extern int getnGPUStreams();
1349 extern int get_mpi_process_per_gpu ();
1350
1351/*=====================*/
1352
1353#ifdef __cplusplus
1354 }
1355#endif
1356
1357#endif /* __SUPERLU_DEFS */
void superlu_free_dist(void *)
Definition: memory.c:168
void ifill_dist(int_t *, int_t, int_t)
Fills an integer array with a given value.
Definition: util.c:488
int_t Wait_USend(MPI_Request *, gridinfo_t *, SCT_t *)
Definition: communication_aux.c:56
void superlu_gridinit3d(MPI_Comm Bcomm, int nprow, int npcol, int npdep, gridinfo3d_t *grid)
All processes in the MPI communicator must call this routine.
Definition: superlu_grid3d.c:25
void PrintDouble5(char *, int_t, double *)
int_t num_full_cols_U(int_t kk, int_t **Ufstnz_br_ptr, int_t *xsup, gridinfo_t *, int_t *, int_t *)
Definition: util.c:944
int_t ** getTreePermForest(int_t *myTreeIdxs, int_t *myZeroTrIdxs, sForest_t *sForests, int_t *perm_c_supno, int_t *iperm_c_supno, gridinfo3d_t *grid3d)
void print_options_dist(superlu_dist_options_t *)
Print the options setting.
Definition: util.c:228
void C_BcTree_forwardMessageSimple(C_Tree *tree, void *localBuffer, int msgSize)
Definition: comm_tree.c:75
treePartStrat
Definition: superlu_defs.h:895
@ GD
Definition: superlu_defs.h:897
@ ND
Definition: superlu_defs.h:896
int getNsupers(int n, Glu_persist_t *Glu_persist)
Definition: trfAux.c:42
void superlu_gridexit3d(gridinfo3d_t *grid)
Definition: superlu_grid3d.c:256
int xerr_dist(char *, int *)
Definition: xerr_dist.c:26
void treeImbalance3D(gridinfo3d_t *grid3d, SCT_t *SCT)
Definition: sec_structs.c:532
int_t psymbfact_LUXpand_RL(int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
Definition: psymbfact_util.c:384
void print_etree(int_t *setree, int_t *iperm, int_t nsuper)
Definition: supernodal_etree.c:1045
int_t log2i(int_t index)
Definition: supernodal_etree.c:17
int_t partitionM(int_t *, int_t, int_t, int_t, int_t, int_t)
Definition: util.c:1125
void Destroy_CompCol_Matrix_dist(SuperMatrix *)
Definition: util.c:34
int_t getDescendList(int_t k, int_t *dlist, treeList_t *treeList)
Definition: supernodal_etree.c:259
void arrive_at_ublock(int_t, int_t *, int_t *, int_t *, int_t *, int_t *, int_t, int_t, int_t *, int_t *, int_t *, gridinfo_t *)
Definition: util.c:890
int_t * getMyNodeCountsFr(int_t maxLvl, int_t *myTreeIdxs, sForest_t **sForests)
Definition: supernodalForest.c:276
int_t Wait_UDiagBlockSend(MPI_Request *, gridinfo_t *, SCT_t *)
Definition: communication_aux.c:112
int_t * getMyTopOrder(int_t nnodes, int_t *myPerm, int_t *myIperm, int_t *setree)
Definition: supernodal_etree.c:852
int_t * getFactPerm(int_t)
Definition: trfAux.c:208
int free_treelist(int_t nsuper, treeList_t *treeList)
Definition: supernodal_etree.c:114
void C_BcTree_Nullify(C_Tree *tree)
Definition: comm_tree.c:56
int_t * getEtreeLB(int_t nnodes, int_t *perm_l, int_t *gTopOrder)
Definition: supernodal_etree.c:339
int_t initCommRequests(commRequests_t *comReqs, gridinfo_t *grid)
Definition: treeFactorization.c:227
int_t * calcNumNodes(int_t maxLvl, int_t *treeHeads, treeList_t *treeList)
Definition: supernodal_etree.c:733
int_t * getFactIperm(int_t *, int_t)
Definition: trfAux.c:221
void DistPrint(char *function_name, double value, char *Units, gridinfo_t *grid)
Definition: sec_structs.c:313
int_t * getPerm_c_supno(int_t nsupers, superlu_dist_options_t *, int_t *etree, Glu_persist_t *Glu_persist, int_t **Lrowind_bc_ptr, int_t **Ufstnz_br_ptr, gridinfo_t *)
Definition: trfAux.c:234
int_t * intMalloc_dist(int_t)
Definition: memory.c:219
int_t reduceStat(PhaseType PHASE, SuperLUStat_t *stat, gridinfo3d_t *grid3d)
Definition: util.c:1256
int_t * merg_perms(int_t nperms, int_t *nnodes, int_t **perms)
Definition: supernodal_etree.c:482
int sp_ienv_dist(int)
Definition: sp_ienv.c:73
int_t static_partition(struct superlu_pair *, int_t, int_t *, int_t, int_t *, int_t *, int)
Definition: util.c:864
int_t Wait_LDiagBlock_Recv(MPI_Request *, SCT_t *)
Definition: communication_aux.c:172
sForest_t ** getNestDissForests(int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
Definition: supernodalForest.c:62
int_t ** getNodeListFr(int_t maxLvl, sForest_t **sForests)
Definition: supernodalForest.c:232
void SCT_free(SCT_t *)
Definition: sec_structs.c:294
int_t get_min(int_t *, int_t)
Definition: util.c:847
double estimate_cpu_time(int m, int n, int k)
Definition: acc_aux.c:214
void countnz_dist(const int_t, int_t *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
Definition: util.c:95
int sort_R_info(Remain_info_t *Remain_info, int n)
Definition: sec_structs.c:54
int_t * getTreeHeads(int_t maxLvl, int_t nsupers, treeList_t *treeList)
Definition: supernodal_etree.c:705
void log_memory(int64_t, SuperLUStat_t *)
Definition: util.c:783
int freeMsgsArr(int_t numLA, msgs_t **msgss)
Definition: treeFactorization.c:356
sForest_t ** getForests(int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
Definition: supernodalForest.c:29
int_t getCommonAncestorList(int_t k, int_t *alist, int_t *seTree, treeList_t *treeList)
Definition: supernodal_etree.c:290
void print_panel_seg_dist(int_t, int_t, int_t, int_t, int_t *, int_t *)
Diagnostic print of segment info after panel_dfs().
Definition: util.c:276
void superlu_abort_and_exit_dist(char *)
Definition: memory.c:48
int sort_R_info_elm(Remain_info_t *Remain_info, int n)
Definition: sec_structs.c:82
void Destroy_SuperNode_Matrix_dist(SuperMatrix *)
Definition: util.c:61
int_t initFactStat(int_t nsupers, factStat_t *factStat)
Definition: treeFactorization.c:274
int_t * getReplicatedTrees(gridinfo3d_t *grid3d)
Definition: supernodal_etree.c:815
void SCT_init(SCT_t *)
Definition: sec_structs.c:165
int sp_symetree_dist(int_t *, int_t *, int_t *, int_t, int_t *)
Symmetric elimination tree.
Definition: etree.c:156
int_t getBigUSize(int_t nsupers, gridinfo_t *grid, int_t **Lrowind_bc_ptr)
Definition: trfAux.c:162
int_t symbfact_SubXpand(int_t, int_t, int_t, MemType, int_t *, Glu_freeable_t *)
Definition: memory.c:433
void C_BcTree_Create(C_Tree *tree, MPI_Comm comm, int *ranks, int rank_cnt, int msgSize, char precision)
Definition: comm_tree.c:5
int_t symbfact_SubFree(Glu_freeable_t *)
Definition: memory.c:479
int freeFactNodelists(factNodelists_t *fNlists)
Definition: treeFactorization.c:327
float smach_dist(char *)
Definition: smach_dist.c:16
int_t ** getTreePerm(int_t *myTreeIdxs, int_t *myZeroTrIdxs, int_t *nodeCount, int_t **nodeList, int_t *perm_c_supno, int_t *iperm_c_supno, gridinfo3d_t *grid3d)
Definition: util.c:1190
#define SuperLU_timer_
Definition: superlu_defs.h:275
int compare_pair(const void *, const void *)
Definition: util.c:799
void Destroy_CompCol_Permuted_dist(SuperMatrix *)
A is of type Stype==NCP.
Definition: util.c:73
int file_PrintInt10(FILE *, char *, int_t, int_t *)
Definition: util.c:628
int_t partition(int_t *, int_t, int_t, int_t)
Definition: util.c:1057
void super_stats_dist(int_t, int_t *)
Definition: util.c:542
int_t * getMyEtLims(int_t nnodes, int_t *myTopOrder)
Definition: supernodal_etree.c:886
void PStatPrint(superlu_dist_options_t *, SuperLUStat_t *, gridinfo_t *)
Definition: util.c:308
void get_perm_c_dist(int_t, int_t, SuperMatrix *, int_t *)
Definition: get_perm_c.c:464
int_t * getNodeCountsFr(int_t maxLvl, sForest_t **sForests)
Definition: supernodalForest.c:214
yes_no_t C_BcTree_IsRoot(C_Tree *tree)
Definition: comm_tree.c:71
void PStatInit(SuperLUStat_t *)
Definition: util.c:290
void C_RdTree_waitSendRequest(C_Tree *Tree)
Definition: comm_tree.c:186
void print_sp_ienv_dist(superlu_dist_options_t *)
Print the blocking parameters.
Definition: util.c:252
int_t testSubtreeNodelist(int_t nsupers, int_t numList, int_t **nodeList, int_t *nodeCount)
Definition: supernodal_etree.c:407
int_t symbfact(superlu_dist_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
Definition: symbfact.c:82
int Wait_LUDiagSend(int_t k, MPI_Request *U_diag_blk_send_req, MPI_Request *L_diag_blk_send_req, gridinfo_t *grid, SCT_t *SCT)
Definition: communication_aux.c:195
int sort_U_info_elm(Ublock_info_t *Ublock_info, int n)
Definition: sec_structs.c:90
void PStatFree(SuperLUStat_t *)
Definition: util.c:480
float get_perm_c_parmetis(SuperMatrix *, int_t *, int_t *, int, int, int_t **, int_t **, gridinfo_t *, MPI_Comm *)
Definition: get_perm_c_parmetis.c:104
int mc64id_dist(int *)
Definition: mc64ad_dist.c:57
int_t QuerySpace_dist(int_t, int_t, Glu_freeable_t *, superlu_dist_mem_usage_t *)
Definition: memory.c:617
int_t Test_UDiagBlock_Recv(MPI_Request *, SCT_t *)
Definition: communication_aux.c:161
void at_plus_a_dist(const int_t, const int_t, int_t *, int_t *, int_t *, int_t **, int_t **)
Definition: get_perm_c.c:301
double dmach_dist(char *)
Definition: dmach_dist.c:16
void get_diag_procs(int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **)
Definition: util.c:495
int sp_coletree_dist(int_t *, int_t *, int_t *, int_t, int_t, int_t *)
Nonsymmetric elimination tree.
Definition: etree.c:223
int_t Wait_LDiagBlockSend(MPI_Request *, gridinfo_t *, SCT_t *)
Definition: communication_aux.c:132
int_t Trs2_InitUblock_info(int_t klst, int_t nb, Ublock_info_t *, int_t *usub, Glu_persist_t *, SuperLUStat_t *)
Definition: trfAux.c:1172
int_t ** getNodeList(int_t maxLvl, int_t *setree, int_t *nnodes, int_t *treeHeads, treeList_t *treeList)
Definition: supernodal_etree.c:759
int * int32Calloc_dist(int)
Definition: memory.c:209
int Cmpfunc_U_info(const void *a, const void *b)
Definition: sec_structs.c:48
int_t getNumLookAhead(superlu_dist_options_t *)
Definition: treeFactorization.c:385
int superlu_dist_GetVersionNumber(int *, int *, int *)
Definition: superlu_dist_version.c:22
int_t Check_LRecv(MPI_Request *, int *msgcnt)
Definition: communication_aux.c:79
struct xtrsTimer_t xtrsTimer_t
int set_tag_ub()
Definition: trfAux.c:48
void C_RdTree_forwardMessageSimple(C_Tree *Tree, void *localBuffer, int msgSize)
Definition: comm_tree.c:169
int get_acc_offload()
Definition: sec_structs.c:582
void printForestWeightCost(sForest_t **sForests, SCT_t *SCT, gridinfo3d_t *grid3d)
Definition: supernodalForest.c:352
yes_no_t C_RdTree_IsRoot(C_Tree *tree)
Definition: comm_tree.c:164
int * int32Malloc_dist(int)
Definition: memory.c:202
void Destroy_CompRow_Matrix_dist(SuperMatrix *)
Definition: util.c:53
void PrintInt32(char *, int, int *)
Definition: util.c:614
int_t LDiagBlockRecvWait(int_t k, int_t *factored_U, MPI_Request *, gridinfo_t *)
Definition: communication_aux.c:218
float symbfact_dist(int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *, Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, superlu_dist_mem_usage_t *)
Definition: psymbfact.c:142
void getSCUweight(int_t nsupers, treeList_t *treeList, int_t *xsup, int_t **Lrowind_bc_ptr, int_t **Ufstnz_br_ptr, gridinfo3d_t *grid3d)
Definition: trfAux.c:1205
treeList_t * setree2list(int_t nsuper, int_t *setree)
Definition: supernodal_etree.c:71
int_t Wait_UDiagBlock_Recv(MPI_Request *, SCT_t *)
Definition: communication_aux.c:152
int_t checkIntVector3d(int_t *vec, int_t len, gridinfo3d_t *grid3d)
Definition: util.c:1219
int_t ** getTreePermFr(int_t *myTreeIdxs, sForest_t **sForests, gridinfo3d_t *grid3d)
Definition: supernodalForest.c:290
void print_memorylog(SuperLUStat_t *, char *)
Definition: util.c:793
int64_t fixupL_dist(const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *)
Definition: util.c:158
void printTRStimer(xtrsTimer_t *xtrsTimer, gridinfo3d_t *grid3d)
int_t * getSubTreeRoots(int_t k, treeList_t *treeList)
Definition: supernodal_etree.c:378
void SCT_print(gridinfo_t *grid, SCT_t *SCT)
Definition: sec_structs.c:433
void superlu_gridmap(MPI_Comm, int, int, int[], int, gridinfo_t *)
All processes in the MPI communicator must call this routine.
Definition: superlu_grid.c:87
static const int RD_U
Definition: superlu_defs.h:230
int_t calcTreeWeight(int_t nsupers, int_t *setree, treeList_t *treeList, int_t *xsup)
Definition: supernodal_etree.c:183
int get_mpi_process_per_gpu()
Definition: util.c:1528
int_t * getGridTrees(gridinfo3d_t *grid3d)
Definition: supernodal_etree.c:802
int_t * topological_ordering(int_t nsuper, int_t *setree)
Definition: supernodal_etree.c:54
int_t * calculate_num_children(int_t nsuper, int_t *setree)
Definition: supernodal_etree.c:958
int * getIsNodeInMyGrid(int_t nsupers, int_t maxLvl, int_t *myNodeCount, int_t **treePerm)
Definition: supernodalForest.c:305
void pxerr_dist(char *, gridinfo_t *, int_t)
Definition: pxerr_dist.c:27
int_t * getPermNodeList(int_t nnode, int_t *nlist, int_t *perm_c_sup, int_t *iperm_c_sup)
Definition: supernodal_etree.c:317
int_t * getMyNodeCounts(int_t maxLvl, int_t *myTreeIdxs, int_t *gNodeCount)
Definition: util.c:1208
int * getLastDepBtree(int_t nsupers, treeList_t *treeList)
int freeCommRequestsArr(int_t mxLeafNode, commRequests_t **comReqss)
Definition: treeFactorization.c:257
int getNumThreads(int)
Definition: trfAux.c:61
int_t psymbfact_LUXpandMem(int_t, int_t, int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
Definition: psymbfact_util.c:91
int file_PrintInt32(FILE *, char *, int, int *)
Definition: util.c:643
int_t testListPerm(int_t nodeCount, int_t *nodeList, int_t *permList, int_t *gTopLevel)
Definition: supernodal_etree.c:436
int_t * getMyIperm(int_t nnodes, int_t nsupers, int_t *myPerm)
Definition: supernodal_etree.c:835
int sort_U_info(Ublock_info_t *Ublock_info, int n)
Definition: sec_structs.c:61
int file_PrintLong10(FILE *, char *, int_t, int_t *)
void PrintInt10(char *, int_t, int_t *)
Definition: util.c:600
treeTopoInfo_t getMyTreeTopoInfo(int_t nnodes, int_t nsupers, int_t *myPerm, int_t *setree)
Definition: supernodal_etree.c:913
void superlu_gridexit(gridinfo_t *)
Definition: superlu_grid.c:200
int_t printFileList(char *sname, int_t nnodes, int_t *dlist, int_t *setree)
Definition: supernodal_etree.c:241
int_t initMsgs(msgs_t *msgs)
Definition: treeFactorization.c:336
int int_t
Definition: superlu_defs.h:114
void DistPrint3D(char *function_name, double value, char *Units, gridinfo3d_t *grid3d)
Definition: sec_structs.c:341
int_t symbfact_SubInit(fact_t, void *, int_t, int_t, int_t, int_t, Glu_persist_t *, Glu_freeable_t *)
Definition: memory.c:304
int_t psymbfact_LUXpand(int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
Definition: psymbfact_util.c:213
void SCT_printComm3D(gridinfo3d_t *grid3d, SCT_t *SCT)
Definition: sec_structs.c:563
void C_RdTree_Nullify(C_Tree *tree)
Definition: comm_tree.c:148
void C_BcTree_waitSendRequest(C_Tree *tree)
Definition: comm_tree.c:90
void initTRStimer(xtrsTimer_t *xtrsTimer, gridinfo_t *grid)
void SCT_print3D(gridinfo3d_t *grid3d, SCT_t *SCT)
Definition: sec_structs.c:508
void * superlu_malloc_dist(size_t)
Definition: memory.c:163
void isort1(int_t N, int_t *ARRAY)
Definition: util.c:738
int_t * TreePostorder_dist(int_t, int_t *)
Definition: etree.c:393
int_t getCommonAncsCount(int_t k, treeList_t *treeList)
Definition: supernodal_etree.c:278
int_t psymbfact_prLUXpand(int_t, int_t, int, Llu_symbfact_t *, psymbfact_stat_t *)
Definition: psymbfact_util.c:502
commRequests_t ** initCommRequestsArr(int_t mxLeafNode, int_t ldt, gridinfo_t *grid)
Definition: treeFactorization.c:243
int_t initFactNodelists(int_t, int_t, int_t, factNodelists_t *)
Definition: treeFactorization.c:312
void Destroy_SuperMatrix_Store_dist(SuperMatrix *)
Deallocate the structure pointing to the actual storage of the matrix.
Definition: util.c:29
void Print_EtreeLevelBoundry(int_t *Etree_LvlBdry, int_t max_level, int_t nsuper)
Definition: supernodal_etree.c:974
void quickSortM(int_t *, int_t, int_t, int_t, int_t, int_t)
Definition: util.c:1109
static const int BC_U
Definition: superlu_defs.h:229
int_t Wait_LSend(int_t k, gridinfo_t *grid, int **ToSendR, MPI_Request *s, SCT_t *)
Definition: communication_aux.c:32
int_t get_max_buffer_size()
Definition: util.c:815
void C_RdTree_Create(C_Tree *tree, MPI_Comm comm, int *ranks, int rank_cnt, int msgSize, char precision)
Definition: comm_tree.c:100
int get_thread_per_process()
Definition: util.c:804
int_t * intCalloc_dist(int_t)
Definition: memory.c:226
double CPU_CLOCK_RATE
Definition: sec_structs.c:39
int Cmpfunc_R_info(const void *a, const void *b)
Definition: sec_structs.c:41
int_t * Etree_LevelBoundry(int_t *perm, int_t *tsort_etree, int_t nsuper)
Definition: supernodal_etree.c:927
void set_default_options_dist(superlu_dist_options_t *)
Set the default values for the options argument.
Definition: util.c:198
int genmmd_dist_(int_t *, int_t *, int_t *a, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *)
Definition: mmd.c:64
void isort(int_t N, int_t *ARRAY1, int_t *ARRAY2)
Definition: util.c:687
int_t * getGlobal_iperm(int_t nsupers, int_t nperms, int_t **perms, int_t *nnodes)
Definition: supernodal_etree.c:679
void quickSort(int_t *, int_t, int_t, int_t)
Definition: util.c:1044
msgs_t ** initMsgsArr(int_t numLA)
Definition: treeFactorization.c:343
void print_etree_leveled(int_t *setree, int_t *tsort_etree, int_t nsuper)
Definition: supernodal_etree.c:986
int_t CheckZeroDiagonal(int_t, int_t *, int_t *, int_t *)
Definition: util.c:658
int_t Test_LDiagBlock_Recv(MPI_Request *, SCT_t *)
Definition: communication_aux.c:182
void sp_colorder(superlu_dist_options_t *, SuperMatrix *, int_t *, int_t *, SuperMatrix *)
Definition: sp_colorder.c:81
void check_repfnz_dist(int_t, int_t, int_t, int_t *)
Check whether repfnz[] == EMPTY after reset.
Definition: util.c:586
void superlu_gridinit(MPI_Comm, int, int, gridinfo_t *)
All processes in the MPI communicator must call this routine.
Definition: superlu_grid.c:37
sForest_t ** getGreedyLoadBalForests(int_t maxLvl, int_t nsupers, int_t *setree, treeList_t *treeList)
Definition: supernodalForest.c:794
int_t estimate_bigu_size(int_t, int_t **, Glu_persist_t *, gridinfo_t *, int_t *, int_t *)
Definition: util.c:991
void Destroy_CompRowLoc_Matrix_dist(SuperMatrix *)
Definition: util.c:44
int freeFactStat(factStat_t *factStat)
Definition: treeFactorization.c:299
int getnGPUStreams()
Definition: util.c:1512
int_t * supernodal_etree(int_t nsuper, int_t *etree, int_t *supno, int_t *xsup)
Definition: supernodal_etree.c:32
static const int BC_L
Definition: superlu_defs.h:227
void bcast_tree(void *, int, MPI_Datatype, int, int, gridinfo_t *, int, int *)
Definition: comm.c:72
static const int RD_L
Definition: superlu_defs.h:228
trans_t
Definition: superlu_enum_consts.h:34
milu_t
Definition: superlu_enum_consts.h:46
LU_space_t
Definition: superlu_enum_consts.h:40
DiagScale_t
Definition: superlu_enum_consts.h:35
IterRefine_t
Definition: superlu_enum_consts.h:36
rowperm_t
Definition: superlu_enum_consts.h:31
colperm_t
Definition: superlu_enum_consts.h:32
MemType
Definition: superlu_enum_consts.h:38
fact_t
Definition: superlu_enum_consts.h:30
norm_t
Definition: superlu_enum_consts.h:41
yes_no_t
Definition: superlu_enum_consts.h:29
PhaseType
Definition: superlu_enum_consts.h:66
#define MAX_3D_LEVEL
Definition: util_dist.h:77
int_t get_gpublas_nb()
Definition: util.c:826
int_t get_num_gpu_streams()
Definition: util.c:837
void gemm_division_cpu_gpu(superlu_dist_options_t *options, int *num_streams_used, int *stream_end_col, int *ncpu_blks, int nbrow, int ldu, int nstreams, int *full_u_cols, int num_blks, int_t gemmBufferSize)
Definition: util.c:1330
integer, parameter, public usub
Definition: superlupara.f90:35
Definitions for parallel symbolic factorization routine.
Definition: superlu_defs.h:1142
int tag_
Definition: superlu_defs.h:1150
int msgSize_
Definition: superlu_defs.h:1149
MPI_Comm comm_
Definition: superlu_defs.h:1144
int destCnt_
Definition: superlu_defs.h:1146
MPI_Datatype type_
Definition: superlu_defs.h:1152
yes_no_t empty_
Definition: superlu_defs.h:1151
int myRank_
Definition: superlu_defs.h:1148
int myRoot_
Definition: superlu_defs.h:1145
Definition: superlu_defs.h:490
int_t nzumax
Definition: superlu_defs.h:496
LU_space_t MemModel
Definition: superlu_defs.h:497
int_t nzlmax
Definition: superlu_defs.h:495
int64_t nnzLU
Definition: superlu_defs.h:500
int_t * usub
Definition: superlu_defs.h:493
int_t * lsub
Definition: superlu_defs.h:491
int_t * xusub
Definition: superlu_defs.h:494
int_t * xlsub
Definition: superlu_defs.h:492
Definition: superlu_defs.h:435
int_t * xsup
Definition: superlu_defs.h:436
int_t * supno
Definition: superlu_defs.h:437
Definition: psymbfact.h:106
Definition: psymbfact.h:57
Definition: superlu_defs.h:770
int_t lptr
Definition: superlu_defs.h:771
int_t StRow
Definition: superlu_defs.h:776
int_t eo
Definition: superlu_defs.h:773
int_t nrows
Definition: superlu_defs.h:774
int_t ib
Definition: superlu_defs.h:772
int_t FullRow
Definition: superlu_defs.h:775
Definition: util_dist.h:172
Definition: util_dist.h:95
Definition: supermatrix.h:54
Definition: superlu_defs.h:760
int_t eo
Definition: superlu_defs.h:765
int_t jb
Definition: superlu_defs.h:763
int_t StCol
Definition: superlu_defs.h:767
int_t iukp
Definition: superlu_defs.h:762
int_t full_u_cols
Definition: superlu_defs.h:764
int_t ncols
Definition: superlu_defs.h:766
int_t rukp
Definition: superlu_defs.h:761
Definition: superlu_defs.h:752
int_t lbnum
Definition: superlu_defs.h:753
int_t indpos
Definition: superlu_defs.h:754
Definition: superlu_defs.h:924
MPI_Request * send_req
Definition: superlu_defs.h:932
MPI_Request * recv_req
Definition: superlu_defs.h:930
MPI_Request * L_diag_blk_recv_req
Definition: superlu_defs.h:926
MPI_Request * recv_requ
Definition: superlu_defs.h:931
MPI_Request * U_diag_blk_send_req
Definition: superlu_defs.h:929
MPI_Request * L_diag_blk_send_req
Definition: superlu_defs.h:927
MPI_Request * send_requ
Definition: superlu_defs.h:933
MPI_Request * U_diag_blk_recv_req
Definition: superlu_defs.h:928
Definition: superlu_defs.h:852
int_t ksup_size
Definition: superlu_defs.h:865
int_t copyU_kljb
Definition: superlu_defs.h:858
int_t next_col
Definition: superlu_defs.h:853
int_t kijb
Definition: superlu_defs.h:856
int_t * kindexL
Definition: superlu_defs.h:861
int_t next_k
Definition: superlu_defs.h:854
int_t mkrow
Definition: superlu_defs.h:863
int_t copyL_kljb
Definition: superlu_defs.h:857
int_t u_copy_len
Definition: superlu_defs.h:860
int_t kljb
Definition: superlu_defs.h:855
int_t * kindexU
Definition: superlu_defs.h:862
int_t mkcol
Definition: superlu_defs.h:864
int_t l_copy_len
Definition: superlu_defs.h:859
Definition: superlu_defs.h:780
int id
Definition: superlu_defs.h:781
void * next
Definition: superlu_defs.h:782
Definition: superlu_defs.h:937
int_t * perm_u
Definition: superlu_defs.h:940
int * indirect
Definition: superlu_defs.h:941
int * indirect2
Definition: superlu_defs.h:942
int_t * iperm_c_supno
Definition: superlu_defs.h:938
int_t * iperm_u
Definition: superlu_defs.h:939
Definition: superlu_defs.h:839
int_t * IrecvPlcd_D
Definition: superlu_defs.h:844
int_t * factored
Definition: superlu_defs.h:840
int_t * factored_U
Definition: superlu_defs.h:843
int_t * factored_D
Definition: superlu_defs.h:841
int_t * IbcastPanel_U
Definition: superlu_defs.h:846
int_t * numChildLeft
Definition: superlu_defs.h:847
int_t * IbcastPanel_L
Definition: superlu_defs.h:845
int_t * factored_L
Definition: superlu_defs.h:842
int_t * gpuLUreduced
Definition: superlu_defs.h:848
Definition: superlu_defs.h:890
int_t * setree
Definition: superlu_defs.h:891
int_t * numChildLeft
Definition: superlu_defs.h:892
Definition: superlu_defs.h:398
int_t npdep
Definition: superlu_defs.h:407
int_t nprow
Definition: superlu_defs.h:405
gridinfo_t grid2d
Definition: superlu_defs.h:403
superlu_scope_t zscp
Definition: superlu_defs.h:402
superlu_scope_t rscp
Definition: superlu_defs.h:400
int iam
Definition: superlu_defs.h:404
int_t npcol
Definition: superlu_defs.h:406
MPI_Comm comm
Definition: superlu_defs.h:399
int rankorder
Definition: superlu_defs.h:408
superlu_scope_t cscp
Definition: superlu_defs.h:401
Definition: superlu_defs.h:388
int_t nprow
Definition: superlu_defs.h:393
int_t npcol
Definition: superlu_defs.h:394
superlu_scope_t cscp
Definition: superlu_defs.h:391
superlu_scope_t rscp
Definition: superlu_defs.h:390
MPI_Comm comm
Definition: superlu_defs.h:389
int iam
Definition: superlu_defs.h:392
Definition: superlu_defs.h:815
int_t lptrj
Definition: superlu_defs.h:817
int_t lib
Definition: superlu_defs.h:818
int_t luptrj
Definition: superlu_defs.h:816
Definition: superlu_defs.h:822
int_t iuip
Definition: superlu_defs.h:823
int_t ruip
Definition: superlu_defs.h:824
int_t ljb
Definition: superlu_defs.h:825
Definition: superlu_defs.h:947
int * msgcnt
Definition: superlu_defs.h:948
int * msgcntU
Definition: superlu_defs.h:949
Definition: superlu_defs.h:833
int_t * perm_c_supno
Definition: superlu_defs.h:834
int_t * iperm_c_supno
Definition: superlu_defs.h:835
statistics collected during parallel symbolic factorization
Definition: psymbfact.h:194
Definition: superlu_defs.h:551
int * disp_ibuf
Definition: superlu_defs.h:559
int_t * recv_ibuf2
Definition: superlu_defs.h:565
int_t b2x
Definition: superlu_defs.h:563
void * send_dbuf
Definition: superlu_defs.h:561
int * ptr_to_dbuf
Definition: superlu_defs.h:554
int_t * send_ibuf2
Definition: superlu_defs.h:564
void * recv_dbuf2
Definition: superlu_defs.h:567
int * B_to_X_SendCnt
Definition: superlu_defs.h:552
int * X_to_B_vSendCnt
Definition: superlu_defs.h:558
int_t * send_ibuf
Definition: superlu_defs.h:560
int * X_to_B_SendCnt
Definition: superlu_defs.h:553
int * X_to_B_iSendCnt
Definition: superlu_defs.h:557
void * send_dbuf2
Definition: superlu_defs.h:566
Definition: superlu_defs.h:901
treeTopoInfo_t topoInfo
Definition: superlu_defs.h:911
int_t numLvl
Definition: superlu_defs.h:909
double weight
Definition: superlu_defs.h:918
int_t * nodeList
Definition: superlu_defs.h:904
int_t numTrees
Definition: superlu_defs.h:910
double cost
Definition: superlu_defs.h:919
int_t nNodes
Definition: superlu_defs.h:903
Definition: superlu_defs.h:744
int expansions
Definition: superlu_defs.h:747
float for_lu
Definition: superlu_defs.h:745
float total
Definition: superlu_defs.h:746
int64_t nnzL
Definition: superlu_defs.h:748
Definition: superlu_defs.h:712
yes_no_t PrintStat
Definition: superlu_defs.h:735
yes_no_t SymmetricMode
Definition: superlu_defs.h:720
double ILU_DropTol
Definition: superlu_defs.h:725
yes_no_t RefineInitialized
Definition: superlu_defs.h:734
yes_no_t Algo3d
Definition: superlu_defs.h:741
yes_no_t PivotGrowth
Definition: superlu_defs.h:721
double DiagPivotThresh
Definition: superlu_defs.h:719
yes_no_t Equil
Definition: superlu_defs.h:714
norm_t ILU_Norm
Definition: superlu_defs.h:727
yes_no_t lookahead_etree
Definition: superlu_defs.h:738
int num_lookaheads
Definition: superlu_defs.h:737
yes_no_t ConditionNumber
Definition: superlu_defs.h:722
trans_t Trans
Definition: superlu_defs.h:717
yes_no_t SymPattern
Definition: superlu_defs.h:740
milu_t ILU_MILU
Definition: superlu_defs.h:729
IterRefine_t IterRefine
Definition: superlu_defs.h:718
double ILU_MILU_Dim
Definition: superlu_defs.h:730
yes_no_t SolveInitialized
Definition: superlu_defs.h:733
fact_t Fact
Definition: superlu_defs.h:713
yes_no_t DiagInv
Definition: superlu_defs.h:715
double ILU_FillTol
Definition: superlu_defs.h:728
double ILU_FillFactor
Definition: superlu_defs.h:726
colperm_t ColPerm
Definition: superlu_defs.h:716
int ILU_DropRule
Definition: superlu_defs.h:724
yes_no_t ParSymbFact
Definition: superlu_defs.h:731
rowperm_t RowPerm
Definition: superlu_defs.h:723
yes_no_t ReplaceTinyPivot
Definition: superlu_defs.h:732
Definition: superlu_defs.h:786
int val
Definition: superlu_defs.h:788
int ind
Definition: superlu_defs.h:787
Definition: superlu_defs.h:381
int Np
Definition: superlu_defs.h:383
MPI_Comm comm
Definition: superlu_defs.h:382
int Iam
Definition: superlu_defs.h:384
Definition: superlu_defs.h:868
int_t depth
Definition: superlu_defs.h:875
int_t * childrenList
Definition: superlu_defs.h:874
int_t left
Definition: superlu_defs.h:871
int_t right
Definition: superlu_defs.h:872
double scuWeight
Definition: superlu_defs.h:878
int_t numChild
Definition: superlu_defs.h:869
int_t extra
Definition: superlu_defs.h:873
double iWeight
Definition: superlu_defs.h:877
int_t numDescendents
Definition: superlu_defs.h:870
double weight
Definition: superlu_defs.h:876
Definition: superlu_defs.h:882
int_t numLvl
Definition: superlu_defs.h:883
int_t * eTreeTopLims
Definition: superlu_defs.h:884
int_t * myIperm
Definition: superlu_defs.h:885
Local information on vertices distribution.
Definition: psymbfact.h:140
Definition: superlu_defs.h:953
int_t trsMsgRecvZ
Definition: superlu_defs.h:974
double t_forwardSolve
Definition: superlu_defs.h:960
double tbs_comm
Definition: superlu_defs.h:965
double t_pdReDistribute_X_to_B
Definition: superlu_defs.h:958
int_t trsMsgRecvXY
Definition: superlu_defs.h:973
double tfs_comm
Definition: superlu_defs.h:962
int_t trsMsgSentZ
Definition: superlu_defs.h:972
double trsDataSendZ
Definition: superlu_defs.h:955
double trsDataRecvZ
Definition: superlu_defs.h:957
double trsDataSendXY
Definition: superlu_defs.h:954
double t_pdReDistribute_B_to_X
Definition: superlu_defs.h:959
double ppXmem
Definition: superlu_defs.h:976
double tbs_compute
Definition: superlu_defs.h:964
double tfs_tree[2 *MAX_3D_LEVEL]
Definition: superlu_defs.h:967
double tbs_tree[2 *MAX_3D_LEVEL]
Definition: superlu_defs.h:966
int_t trsMsgSentXY
Definition: superlu_defs.h:971
double t_backwardSolve
Definition: superlu_defs.h:963
double trsDataRecvXY
Definition: superlu_defs.h:956
double tfs_compute
Definition: superlu_defs.h:961
Macro definitions.
enum constants header file
Matrix type definitions.
Header for utilities.