SuperLU Distributed 9.0.0
gpu3d
superlu_ddefs.h
Go to the documentation of this file.
1
28#ifndef __SUPERLU_DDEFS /* allow multiple inclusions */
29#define __SUPERLU_DDEFS
30
31/*
32 * File name: superlu_ddefs.h
33 * Purpose: Distributed SuperLU data types and function prototypes
34 * History:
35 */
36
37#include "superlu_defs.h"
38
39/*
40 *-- The structure used to store matrix A of the linear system and
41 * several vectors describing the transformations done to matrix A.
42 *
43 * A (SuperMatrix*)
44 * Matrix A in A*X=B, of dimension (A->nrow, A->ncol).
45 * The number of linear equations is A->nrow. The type of A can be:
46 * Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
47 *
48 * DiagScale (DiagScale_t)
49 * Specifies the form of equilibration that was done.
50 * = NOEQUIL: No equilibration.
51 * = ROW: Row equilibration, i.e., A was premultiplied by diag(R).
52 * = COL: Column equilibration, i.e., A was postmultiplied by diag(C).
53 * = BOTH: Both row and column equilibration, i.e., A was replaced
54 * by diag(R)*A*diag(C).
55 *
56 * R double*, dimension (A->nrow)
57 * The row scale factors for A.
58 * If DiagScale = ROW or BOTH, A is multiplied on the left by diag(R).
59 * If DiagScale = NOEQUIL or COL, R is not defined.
60 *
61 * C double*, dimension (A->ncol)
62 * The column scale factors for A.
63 * If DiagScale = COL or BOTH, A is multiplied on the right by diag(C).
64 * If DiagScale = NOEQUIL or ROW, C is not defined.
65 *
66 * perm_r (int*) dimension (A->nrow)
67 * Row permutation vector which defines the permutation matrix Pr,
68 * perm_r[i] = j means row i of A is in position j in Pr*A.
69 *
70 * perm_c (int*) dimension (A->ncol)
71 * Column permutation vector, which defines the
72 * permutation matrix Pc; perm_c[i] = j means column i of A is
73 * in position j in A*Pc.
74 *
75 */
76typedef struct {
78 double *R;
79 double *C;
83
84#if 0 // Sherry: move to superlu_defs.h
85/*-- Auxiliary data type used in PxGSTRS/PxGSTRS1. */
86typedef struct {
87 int_t lbnum; /* Row block number (local). */
88 int_t indpos; /* Starting position in Uindex[]. */
90#endif
91
92/*
93 * On each processor, the blocks in L are stored in compressed block
94 * column format, the blocks in U are stored in compressed block row format.
95 */
96#define MAX_LOOKAHEADS 50
97typedef struct {
98 int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc);
99 free'd in dtrs_compute_communication_structure routinies */
100 int_t *Lrowind_bc_dat; /* size sum of sizes of Lrowind_bc_ptr[lk]) */
101 long int *Lrowind_bc_offset; /* size ceil(NSUPERS/Pc) */
103
104 double **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc);
105 free'd in dtrs_compute_communication_structure routinies */
106 double *Lnzval_bc_dat; /* size sum of sizes of Lnzval_bc_ptr[lk]) */
107 long int *Lnzval_bc_offset; /* size ceil(NSUPERS/Pc) */
109
110 double **Linv_bc_ptr; /* size ceil(NSUPERS/Pc);
111 free'd in dtrs_compute_communication_structure routinies */
112 double *Linv_bc_dat; /* size sum of sizes of Linv_bc_ptr[lk]) */
113 long int *Linv_bc_offset; /* size ceil(NSUPERS/Pc) */
114 long int Linv_bc_cnt;
115
116 int_t **Lindval_loc_bc_ptr; /* size ceil(NSUPERS/Pc);
117 pointers to locations in Lrowind_bc_ptr and Lnzval_bc_ptr;
118 free'd in dtrs_compute_communication_structure routinies */
119
120 int_t *Lindval_loc_bc_dat; /* size: sum of sizes of Lindval_loc_bc_ptr[lk]) */
121 long int *Lindval_loc_bc_offset; /* size ceil(NSUPERS/Pc) */
123
124 /* for new U format -> */
125 int_t **Ucolind_bc_ptr; /* size ceil(NSUPERS/Pc) */
126 int_t *Ucolind_bc_dat; /* size: sum of sizes of Ucolind_bc_ptr[lk]) */
127 int64_t *Ucolind_bc_offset; /* size ceil(NSUPERS/Pc) */
129
130 double **Unzval_bc_ptr; /* size ceil(NSUPERS/Pc) */
131 double *Unzval_bc_dat; /* size: sum of sizes of Unzval_bc_ptr[lk]) */
132 int64_t *Unzval_bc_offset; /* size ceil(NSUPERS/Pc) */
134
135 int_t **Uindval_loc_bc_ptr; /* size ceil(NSUPERS/Pc) pointers to locations in Ucolind_bc_ptr and Unzval_bc_ptr */
136 int_t *Uindval_loc_bc_dat; /* size: sum of sizes of Uindval_loc_bc_ptr[lk]) */
137 int64_t *Uindval_loc_bc_offset; /* size ceil(NSUPERS/Pc) */
139
140 int_t **Uind_br_ptr; /* size ceil(NSUPERS/Pr) pointers to locations in Ucolind_bc_ptr for each block row */
141 int_t *Uind_br_dat; /* size: sum of sizes of Uind_br_ptr[lk]) */
142 int64_t *Uind_br_offset; /* size ceil(NSUPERS/Pr) */
143 int64_t Uind_br_cnt;
144
145 int_t **Ucolind_br_ptr; /* size ceil(NSUPERS/Pr) */
146 int_t *Ucolind_br_dat; /* size: sum of sizes of Ucolind_br_ptr[lk]) */
147 int64_t *Ucolind_br_offset; /* size ceil(NSUPERS/Pr) */
149
150 double **Unzval_br_new_ptr; /* size ceil(NSUPERS/Pr) */
151 double *Unzval_br_new_dat; /* size: sum of sizes of Unzval_br_ptr[lk]) */
152 int64_t *Unzval_br_new_offset; /* size ceil(NSUPERS/Pr) */
154
155 /* end for new U format <- */
156
157 int_t *Unnz; /* number of nonzeros per block column in U*/
158 int_t **Lrowind_bc_2_lsum; /* size ceil(NSUPERS/Pc) map indices of Lrowind_bc_ptr to indices of lsum */
159 double **Uinv_bc_ptr; /* size ceil(NSUPERS/Pc) */
160 double *Uinv_bc_dat; /* size sum of sizes of Linv_bc_ptr[lk]) */
161 long int *Uinv_bc_offset; /* size ceil(NSUPERS/Pc) */
162 long int Uinv_bc_cnt;
163
164 int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */
165 int_t *Ufstnz_br_dat; /* size sum of sizes of Ufstnz_br_ptr[lk]) */
166 long int *Ufstnz_br_offset; /* size ceil(NSUPERS/Pr) */
168
169 double **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */
170 double *Unzval_br_dat; /* size sum of sizes of Unzval_br_ptr[lk]) */
171 long int *Unzval_br_offset; /* size ceil(NSUPERS/Pr) */
173
174 /*-- Data structures used for broadcast and reduction trees. --*/
175 C_Tree *LBtree_ptr; /* size ceil(NSUPERS/Pc) */
176 C_Tree *LRtree_ptr; /* size ceil(NSUPERS/Pr) */
177 C_Tree *UBtree_ptr; /* size ceil(NSUPERS/Pc) */
178 C_Tree *URtree_ptr; /* size ceil(NSUPERS/Pr) */
179#if 0
180 int_t *Lsub_buf; /* Buffer for the remote subscripts of L */
181 double *Lval_buf; /* Buffer for the remote nonzeros of L */
182 int_t *Usub_buf; /* Buffer for the remote subscripts of U */
183 double *Uval_buf; /* Buffer for the remote nonzeros of U */
184#endif
185 int_t *Lsub_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote subscripts of L*/
186 double *Lval_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote nonzeros of L */
187 int_t *Usub_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote subscripts of U */
188 double *Uval_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote nonzeros of U */
189 double *ujrow; /* used in panel factorization. */
190 int_t bufmax[NBUFFERS]; /* Maximum buffer size across all MPI ranks:
191 * 0 : maximum size of Lsub_buf[]
192 * 1 : maximum size of Lval_buf[]
193 * 2 : maximum size of Usub_buf[]
194 * 3 : maximum size of Uval_buf[]
195 * 4 : maximum size of tempv[LDA]
196 */
197
198 /*-- Record communication schedule for factorization. --*/
199 int *ToRecv; /* Recv from no one (0), left (1), and up (2).*/
200 int *ToSendD; /* Whether need to send down block row. */
201 int **ToSendR; /* List of processes to send right block col. */
202
203 /*-- Record communication schedule for forward/back solves. --*/
204 /* 1/15/22 Sherry: changed int_t to int type */
205 int *fmod; /* Modification count for L-solve */
206 int **fsendx_plist; /* Column process list to send down Xk */
207 int *frecv; /* Modifications to be recv'd in proc row */
208 int nfrecvx; /* Number of Xk I will receive in L-solve */
209 int nfsendx; /* Number of Xk I will send in L-solve */
210 int *bmod; /* Modification count for U-solve */
211 int **bsendx_plist; /* Column process list to send down Xk */
212 int *brecv; /* Modifications to be recv'd in proc row */
213 int nbrecvx; /* Number of Xk I will receive in U-solve */
214 int nbsendx; /* Number of Xk I will send in U-solve */
215 int *mod_bit; /* Flag contribution from each row blocks */
216 int nleaf;
217 int nroot;
218 /*-- Auxiliary arrays used for forward/back solves. --*/
219 int_t *ilsum; /* Starting position of each supernode in lsum
220 (local) */
221 int_t ldalsum; /* LDA of lsum (local) */
222 int_t SolveMsgSent; /* Number of actual messages sent in LU-solve */
223 int_t SolveMsgVol; /* Volume of messages sent in the solve phase */
224 int *bcols_masked; /* Local block column IDs in my 2D grid */
225
226 /*********************/
227 /* The following variables are used in the hybrid solver */
228
229 /*-- Counts to be used in U^{-T} triangular solve. -- */
233 int_t ut_ldalsum; /* LDA of lsum (local) */
234 int_t *ut_ilsum; /* ilsum in column-wise */
235 int_t *utmod; /* Modification count for Ut-solve. */
236 int_t **ut_sendx_plist; /* Row process list to send down Xk */
237 int_t *utrecv; /* Modifications to be recev'd in proc column. */
238 int_t n_utsendx; /* Number of Xk I will receive */
239 int_t n_utrecvx; /* Number of Xk I will send */
243 Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */
245 long int *Ucb_indoffset;
246 long int Ucb_indcnt;
247
248 int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */
250 long int *Ucb_valoffset;
251 long int Ucb_valcnt;
252
253 /* some additional counters for L solve */
256 int_t inv; /* whether the diagonal block is inverted*/
257 int nbcol_masked; /*number of local block columns in my 2D grid*/
258
259#ifdef GPU_ACC
260 /* The following variables are used in GPU trisolve */
261
272 int_t *d_Ucolind_br_dat; /* size: sum of sizes of Ucolind_br_ptr[lk]) */
273 int64_t *d_Ucolind_br_offset; /* size ceil(NSUPERS/Pr) */
274 double *d_Unzval_br_new_dat; /* size: sum of sizes of Unzval_br_ptr[lk]) */
275 int64_t *d_Unzval_br_new_offset; /* size ceil(NSUPERS/Pr) */
276
285 int *d_bcols_masked; /* Local block column IDs in my 2D grid */
286
287 // long int *d_Lindval_loc_bc_offset ;
288 // int_t *d_Urbs;
289 // int_t *d_Ufstnz_br_dat;
290 // long int *d_Ufstnz_br_offset;
291 // double *d_Unzval_br_dat;
292 // long int *d_Unzval_br_offset;
293 // int_t *d_Ucb_valdat;
294 // long int *d_Ucb_valoffset;
295 // Ucb_indptr_t *d_Ucb_inddat;
296 // long int *d_Ucb_indoffset;
297
305#endif
306
307} dLocalLU_t;
308
309typedef struct
310{
312 double * Lval_buf ;
314 double * Uval_buf ;
316
317typedef struct
318{
331 int maxLvl; // YL: store this to avoid the use of grid3d
332
333 /* Sherry added the following 3 for variable size batch. 2/17/23 */
334 int mxLeafNode; /* number of leaf nodes. */
335 int *diagDims; /* dimensions of the diagonal blocks at any level of the tree */
336 int *gemmCsizes; /* sizes of the C matrices at any level of the tree. */
338
339
340typedef struct {
345 char dt;
347
348
349/*-- Data structure for communication during matrix-vector multiplication. */
350typedef struct {
352 int_t *ind_tosend; /* X indeices to be sent to other processes */
353 int_t *ind_torecv; /* X indeices to be received from other processes */
354 int_t *ptr_ind_tosend;/* Printers to ind_tosend[] (Size procs)
355 (also point to val_torecv) */
356 int_t *ptr_ind_torecv;/* Printers to ind_torecv[] (Size procs)
357 (also point to val_tosend) */
358 int *SendCounts; /* Numbers of X indices to be sent
359 (also numbers of X values to be received) */
360 int *RecvCounts; /* Numbers of X indices to be received
361 (also numbers of X values to be sent) */
362 void *val_tosend; /* X values to be sent to other processes */
363 void *val_torecv; /* X values to be received from other processes */
364 int_t TotalIndSend; /* Total number of indices to be sent
365 (also total number of values to be received) */
366 int_t TotalValSend; /* Total number of values to be sent.
367 (also total number of indices to be received) */
369
370/*-- Data structure holding the information for the solution phase --*/
371typedef struct {
374 int_t num_diag_procs, *diag_procs, *diag_len;
375 pdgsmv_comm_t *gsmv_comm; /* communication metadata for SpMV,
376 required by IterRefine. */
377 pxgstrs_comm_t *gstrs_comm; /* communication metadata for SpTRSV. */
378 int_t *A_colind_gsmv; /* After pdgsmv_init(), the global column
379 indices of A are translated into the relative
380 positions in the gathered x-vector.
381 This is re-used in repeated calls to pdgsmv() */
382 int_t *xrow_to_proc; /* used by PDSLin */
383 NRformat_loc3d* A3d; /* Point to 3D {A, B} gathered on 2D layer 0.
384 This needs to be peresistent between
385 3D factorization and solve. */
386 #ifdef GPU_ACC
387 double *d_lsum, *d_lsum_save; /* used for device lsum*/
388 double *d_x; /* used for device solution vector*/
389 int *d_fmod_save, *d_fmod; /* used for device fmod vector*/
390 int *d_bmod_save, *d_bmod; /* used for device bmod vector*/
391 #endif
393
394
395
396/*==== For 3D code ====*/
397
398// new structures for pdgstrf_4_8
399
400#if 0 // Sherry: moved to superlu_defs.h
401typedef struct
402{
403 int_t nub;
404 int_t klst;
405 int_t ldu;
406 int_t* usub;
407 double* uval;
409
410typedef struct
411{
412 int_t *lsub;
413 double *lusup;
414 int_t luptr0;
415 int_t nlb; //number of l blocks
416 int_t nsupr;
418
419/* HyP_t is the data structure to assist HALO offload of Schur-complement. */
420typedef struct
421{
422 Remain_info_t *lookAhead_info, *Remain_info;
423 Ublock_info_t *Ublock_info, *Ublock_info_Phi;
424
425 int_t first_l_block_acc , first_u_block_acc;
426 int_t last_offload ;
427 int_t *Lblock_dirty_bit, * Ublock_dirty_bit;
428 double *lookAhead_L_buff, *Remain_L_buff;
429 int_t lookAheadBlk; /* number of blocks in look-ahead window */
430 int_t RemainBlk ; /* number of blocks outside look-ahead window */
431 int_t num_look_aheads, nsupers;
432 int_t ldu, ldu_Phi;
433 int_t num_u_blks, num_u_blks_Phi;
434
435 int_t jj_cpu;
436 double *bigU_Phi;
437 double *bigU_host;
438 int_t Lnbrow;
439 int_t Rnbrow;
440
441 int_t buffer_size;
442 int_t bigu_size;
443 int offloadCondition;
444 int superlu_acc_offload;
445 int nGPUStreams;
446} HyP_t;
447
448#endif // Above are moved to superlu_defs.h
449
450
452 int_t knsupc,
453 HyP_t* HyP,
454 SCT_t* SCT,
455 SuperLUStat_t *stat
456 );
457
458
459
460typedef struct
461{
462 double *bigU;
463 double *bigV;
464} dscuBufs_t;
465
466typedef struct
467{
471
472
473typedef struct dxT_struct
474{
475 double* xT;
479
480typedef struct dlsumBmod_buff_t
481{
482 double * tX; // buffer for reordered X
483 double * tU; // buffer for packedU
486
487/*=====================*/
488
489/***********************************************************************
490 * Function prototypes
491 ***********************************************************************/
492
493#ifdef __cplusplus
494extern "C" {
495#endif
496
497
498/* Supernodal LU factor related */
499extern void
502extern void
504 int_t, double *, int_t *, int_t *,
506extern void
508 double **, int_t **, int_t **);
509extern void
511 double *a, int_t *colptr, int_t *rowind,
512 double **at, int_t **rowptr, int_t **colind);
513extern int
515 SuperMatrix *);
516extern void
518extern void
521extern void
523 int_t *, int_t *, int_t *, int_t *, int_t *,
525extern void
527 double *, int_t);
528
529extern void dallocateA_dist (int_t, int_t, double **, int_t **, int_t **);
530extern void dGenXtrue_dist (int_t, int_t, double *, int_t);
531extern void dFillRHS_dist (char *, int_t, double *, int_t,
532 SuperMatrix *, double *, int_t);
533extern int dcreate_matrix(SuperMatrix *, int, double **, int *,
534 double **, int *, FILE *, gridinfo_t *);
535extern int dcreate_matrix_rb(SuperMatrix *, int, double **, int *,
536 double **, int *, FILE *, gridinfo_t *);
537extern int dcreate_matrix_dat(SuperMatrix *, int, double **, int *,
538 double **, int *, FILE *, gridinfo_t *);
539extern int dcreate_matrix_postfix(SuperMatrix *, int, double **, int *,
540 double **, int *, FILE *, char *, gridinfo_t *);
541
542extern void dScalePermstructInit(const int_t, const int_t,
545
546/* Driver related */
547extern void dgsequ_dist (SuperMatrix *, double *, double *, double *,
548 double *, double *, int *);
549extern double dlangs_dist (char *, SuperMatrix *);
550extern void dlaqgs_dist (SuperMatrix *, double *, double *, double,
551 double, double, char *);
552extern void pdgsequ (SuperMatrix *, double *, double *, double *,
553 double *, double *, int *, gridinfo_t *);
554extern double pdlangs (char *, SuperMatrix *, gridinfo_t *);
555extern void pdlaqgs (SuperMatrix *, double *, double *, double,
556 double, double, char *);
557extern int pdPermute_Dense_Matrix(int_t, int_t, int_t [], int_t[],
558 double [], int, double [], int, int,
559 gridinfo_t *);
560
561extern int sp_dtrsv_dist (char *, char *, char *, SuperMatrix *,
562 SuperMatrix *, double *, int *);
563extern int sp_dgemv_dist (char *, double, SuperMatrix *, double *,
564 int, double, double *, int);
565extern int sp_dgemm_dist (char *, int, double, SuperMatrix *,
566 double *, int, double, double *, int);
567
572 dScalePermstruct_t *, double *,
573 int, int, gridinfo_t *, dLUstruct_t *, double *,
574 SuperLUStat_t *, int *);
579 dScalePermstruct_t *ScalePermstruct,
580 Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct,
581 gridinfo_t *grid, int* supernodeMask);
582
584 dScalePermstruct_t *ScalePermstruct,
585 Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct,
586 gridinfo_t *grid, int* supernodeMask);
588 dScalePermstruct_t *, double *,
589 int, int, gridinfo_t *, dLUstruct_t *,
590 dSOLVEstruct_t *, double *, SuperLUStat_t *, int *);
597 int_t [], int_t [], gridinfo_t *grid,
600 dLUstruct_t *, dSOLVEstruct_t *, int*);
602extern void pxgstrs_finalize(pxgstrs_comm_t *);
603extern int dldperm_dist(int, int, int_t, int_t [], int_t [],
604 double [], int_t *, double [], double []);
605extern int dstatic_schedule(superlu_dist_options_t *, int, int,
607 int_t *, int_t *, int *);
608extern void dLUstructInit(const int_t, dLUstruct_t *);
609extern void dLUstructFree(dLUstruct_t *);
610extern void dDestroy_LU(int_t, gridinfo_t *, dLUstruct_t *);
611extern void dDestroy_Tree(int_t, gridinfo_t *, dLUstruct_t *);
612extern void dscatter_l (int ib, int ljb, int nsupc, int_t iukp, int_t* xsup,
613 int klst, int nbrow, int_t lptr, int temp_nbrow,
614 int_t* usub, int_t* lsub, double *tempv,
615 int* indirect_thread, int* indirect2,
616 int_t ** Lrowind_bc_ptr, double **Lnzval_bc_ptr,
617 gridinfo_t * grid);
618extern void dscatter_u (int ib, int jb, int nsupc, int_t iukp, int_t * xsup,
619 int klst, int nbrow, int_t lptr, int temp_nbrow,
620 int_t* lsub, int_t* usub, double* tempv,
621 int_t ** Ufstnz_br_ptr, double **Unzval_br_ptr,
622 gridinfo_t * grid);
623extern int_t pdgstrf(superlu_dist_options_t *, int, int, double anorm,
625
626/* #define GPU_PROF
627#define IPM_PROF */
628
629/* Solve related */
632 double *, int_t, int, SuperLUStat_t *, int *);
635 double *, int_t, int_t, int_t, int, dSOLVEstruct_t *,
636 SuperLUStat_t *, int *);
637extern void pdgstrf2_trsm(superlu_dist_options_t * options, int_t k0, int_t k,
638 double thresh, Glu_persist_t *, gridinfo_t *,
639 dLocalLU_t *, MPI_Request *, int tag_ub,
640 SuperLUStat_t *, int *info);
641extern void pdgstrs2_omp(int_t k0, int_t k, Glu_persist_t *, gridinfo_t *,
643extern int_t pdReDistribute_B_to_X(double *B, int_t m_loc, int nrhs, int_t ldb,
644 int_t fst_row, int_t *ilsum, double *x,
647extern void dlsum_fmod(double *, double *, double *, double *,
648 int, int, int_t , int *fmod, int_t, int_t, int_t,
650 MPI_Request [], SuperLUStat_t *);
651extern void dlsum_bmod(double *, double *, double *,
652 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
653 int_t **, int_t *, gridinfo_t *, dLocalLU_t *,
654 MPI_Request [], SuperLUStat_t *);
655
656extern void dlsum_fmod_inv(double *, double *, double *, double *,
657 int, int_t , int *fmod,
659 SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int_t, int_t, int, int);
660extern void dlsum_fmod_inv_master(double *, double *, double *, double *,
661 int, int, int_t , int *fmod, int_t,
663 SuperLUStat_t **, int_t, int_t, int_t, int_t, int, int);
664extern void dlsum_bmod_inv(double *, double *, double *, double *,
665 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
666 int_t **, int_t *, gridinfo_t *, dLocalLU_t *,
667 SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int, int);
668extern void dlsum_bmod_inv_master(double *, double *, double *, double *,
669 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
670 int_t **, int_t *, gridinfo_t *, dLocalLU_t *,
671 SuperLUStat_t **, int_t, int_t, int, int);
672
673extern void dComputeLevelsets(int , int_t , gridinfo_t *,
675
676#ifdef GPU_ACC
678
679extern void dlsum_fmod_inv_gpu_wrap(int, int, int, int, double *, double *, int, int, int_t , int *fmod, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, double *, int64_t *, double *, int64_t *, int_t *, int64_t *, int_t *, int *, gridinfo_t *,
680int_t , uint64_t* ,uint64_t* ,double* ,double* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int);
681
682extern void dlsum_bmod_inv_gpu_wrap(superlu_dist_options_t *, int, int, int, int, double *, double *,int,int, int_t , int *, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *,int_t *, int64_t *, int_t *, int64_t *, double *, int64_t *, double *, int64_t *, double *, int64_t *, int_t *, int64_t *, int_t *,gridinfo_t *,
683 int_t, uint64_t*, uint64_t*, double*, double*,
684 int*, int*, int*, int*,
685 int*, int*, int*, int*, int*,
686 int*, int*, int*, int*, int*, int*,
687 int*, int*, int*, int); //int*); //int*, double*);
688
689#endif
690
692 SuperMatrix *, double, dLUstruct_t *,
694 double [], int_t, double [], int_t, int,
695 dSOLVEstruct_t *, double *, SuperLUStat_t *, int *);
696
698 SuperMatrix *, double, dLUstruct_t *,
700 dtrf3Dpartition_t* , double *, int_t, double *, int_t, int,
701 dSOLVEstruct_t *, double *, SuperLUStat_t *, int *);
702
703
705 SuperMatrix *, double, dLUstruct_t *,
706 gridinfo_t *, double *, int_t, double *, int_t,
707 int, double *, SuperLUStat_t *, int *);
709 gridinfo_t *, int_t *, int_t *[],
710 double *[], int_t *[], int_t []);
711extern int pdgsmv_AXglobal(int_t, int_t [], double [], int_t [],
712 double [], double []);
713extern int pdgsmv_AXglobal_abs(int_t, int_t [], double [], int_t [],
714 double [], double []);
715extern void pdgsmv_init(SuperMatrix *, int_t *, gridinfo_t *,
716 pdgsmv_comm_t *);
717extern void pdgsmv(int_t, SuperMatrix *, gridinfo_t *, pdgsmv_comm_t *,
718 double x[], double ax[]);
719extern void pdgsmv_finalize(pdgsmv_comm_t *);
720
721extern int_t dinitLsumBmod_buff(int_t ns, int nrhs, dlsumBmod_buff_t* lbmod_buf);
722extern int_t dleafForestBackSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, dLUstruct_t * LUstruct,
723 dScalePermstruct_t * ScalePermstruct,
724 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
725 double * x, double * lsum, double * recvbuf,
726 MPI_Request * send_req,
727 int nrhs, dlsumBmod_buff_t* lbmod_buf,
728 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
729
730extern int_t dnonLeafForestBackSolve3d( int_t treeId, dLUstruct_t * LUstruct,
731 dScalePermstruct_t * ScalePermstruct,
732 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
733 double * x, double * lsum, dxT_struct *xT_s,double * recvbuf,
734 MPI_Request * send_req,
735 int nrhs, dlsumBmod_buff_t* lbmod_buf,
736 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
737
738extern int_t dlasum_bmod_Tree(int_t pTree, int_t cTree, double *lsum, double *x,
739 dxT_struct *xT_s,
740 int nrhs, dlsumBmod_buff_t* lbmod_buf,
741 dLUstruct_t * LUstruct,
742 dtrf3Dpartition_t* trf3Dpartition,
743 gridinfo3d_t* grid3d, SuperLUStat_t * stat);
744extern int_t dlsumForestBsolve(int_t k, int_t treeId,
745 double *lsum, double *x, dxT_struct *xT_s,int nrhs, dlsumBmod_buff_t* lbmod_buf,
746 dLUstruct_t * LUstruct,
747 dtrf3Dpartition_t* trf3Dpartition,
748 gridinfo3d_t* grid3d, SuperLUStat_t * stat);
749
750extern int_t dbCastXk2Pck (int_t k, dxT_struct *xT_s, int nrhs,
751 dLUstruct_t * LUstruct, gridinfo_t * grid, xtrsTimer_t *xtrsTimer);
752
753extern int_t dlsumReducePrK (int_t k, double*x, double* lsum, double* recvbuf, int nrhs,
754 dLUstruct_t * LUstruct, gridinfo_t * grid, xtrsTimer_t *xtrsTimer);
755
756extern int_t dnonLeafForestForwardSolve3d( int_t treeId, dLUstruct_t * LUstruct,
757 dScalePermstruct_t * ScalePermstruct,
758 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
759 double * x, double * lsum,
760 dxT_struct *xT_s,
761 double * recvbuf, double* rtemp,
762 MPI_Request * send_req,
763 int nrhs,
764 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
765extern int_t dleafForestForwardSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, dLUstruct_t * LUstruct,
766 dScalePermstruct_t * ScalePermstruct,
767 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
768 double * x, double * lsum, double * recvbuf, double* rtemp,
769 MPI_Request * send_req,
770 int nrhs,
771 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
772
773
775 dScalePermstruct_t * ScalePermstruct,
776 int* supernodeMask, gridinfo_t *grid, SuperLUStat_t * stat);
777extern int_t dreduceSolvedX_newsolve(int_t treeId, int_t sender, int_t receiver, double* x, int nrhs,
778 dtrf3Dpartition_t* trf3Dpartition, dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, double* recvbuf, xtrsTimer_t *xtrsTimer);
779
780extern void dlsum_fmod_leaf (
781 int_t treeId,
782 dtrf3Dpartition_t* trf3Dpartition,
783 double *lsum, /* Sum of local modifications. */
784 double *x, /* X array (local) */
785 double *xk, /* X[k]. */
786 double *rtemp, /* Result of full matrix-vector multiply. */
787 int nrhs, /* Number of right-hand sides. */
788 int knsupc, /* Size of supernode k. */
789 int_t k, /* The k-th component of X. */
790 int *fmod, /* Modification count for L-solve. */
791 int_t nlb, /* Number of L blocks. */
792 int_t lptr, /* Starting position in lsub[*]. */
793 int_t luptr, /* Starting position in lusup[*]. */
794 int_t *xsup,
795 gridinfo_t *grid,
796 dLocalLU_t *Llu,
797 MPI_Request send_req[], /* input/output */
798 SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer);
799
800extern void dlsum_fmod_leaf_newsolve (
801 dtrf3Dpartition_t* trf3Dpartition,
802 double *lsum, /* Sum of local modifications. */
803 double *x, /* X array (local) */
804 double *xk, /* X[k]. */
805 double *rtemp, /* Result of full matrix-vector multiply. */
806 int nrhs, /* Number of right-hand sides. */
807 int knsupc, /* Size of supernode k. */
808 int_t k, /* The k-th component of X. */
809 int *fmod, /* Modification count for L-solve. */
810 int_t nlb, /* Number of L blocks. */
811 int_t lptr, /* Starting position in lsub[*]. */
812 int_t luptr, /* Starting position in lusup[*]. */
813 int_t *xsup,
814 gridinfo_t *grid,
815 dLocalLU_t *Llu,
816 MPI_Request send_req[], /* input/output */
817 SuperLUStat_t *stat,xtrsTimer_t *xtrsTimer);
818
819
820extern void dlsum_bmod_GG
821(
822 double *lsum, /* Sum of local modifications. */
823 double *x, /* X array (local). */
824 double *xk, /* X[k]. */
825 int nrhs, /* Number of right-hand sides. */
826 dlsumBmod_buff_t* lbmod_buf,
827 int_t k, /* The k-th component of X. */
828 int *bmod, /* Modification count for L-solve. */
829 int_t *Urbs, /* Number of row blocks in each block column of U.*/
830 Ucb_indptr_t **Ucb_indptr,/* Vertical linked list pointing to Uindex[].*/
831 int_t **Ucb_valptr, /* Vertical linked list pointing to Unzval[]. */
832 int_t *xsup,
833 gridinfo_t *grid,
834 dLocalLU_t *Llu,
835 MPI_Request send_req[], /* input/output */
836 SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer);
837
838extern void dlsum_bmod_GG_newsolve (
839 dtrf3Dpartition_t* trf3Dpartition,
840 double *lsum, /* Sum of local modifications. */
841 double *x, /* X array (local). */
842 double *xk, /* X[k]. */
843 int nrhs, /* Number of right-hand sides. */
844 dlsumBmod_buff_t* lbmod_buf,
845 int_t k, /* The k-th component of X. */
846 int *bmod, /* Modification count for L-solve. */
847 int_t *Urbs, /* Number of row blocks in each block column of U.*/
848 Ucb_indptr_t **Ucb_indptr,/* Vertical linked list pointing to Uindex[].*/
849 int_t **Ucb_valptr, /* Vertical linked list pointing to Unzval[]. */
850 int_t *xsup,
851 gridinfo_t *grid,
852 dLocalLU_t *Llu,
853 MPI_Request send_req[], /* input/output */
854 SuperLUStat_t *stat
855 , xtrsTimer_t *xtrsTimer);
856
857extern int_t
858pdReDistribute3d_B_to_X (double *B, int_t m_loc, int nrhs, int_t ldb,
859 int_t fst_row, int_t * ilsum, double *x,
860 dScalePermstruct_t * ScalePermstruct,
861 Glu_persist_t * Glu_persist,
862 gridinfo3d_t * grid3d, dSOLVEstruct_t * SOLVEstruct);
863
864
865extern int_t
866pdReDistribute3d_X_to_B (int_t n, double *B, int_t m_loc, int_t ldb,
867 int_t fst_row, int nrhs, double *x, int_t * ilsum,
868 dScalePermstruct_t * ScalePermstruct,
869 Glu_persist_t * Glu_persist, gridinfo3d_t * grid3d,
870 dSOLVEstruct_t * SOLVEstruct);
871
872extern void
874 dScalePermstruct_t * ScalePermstruct,
875 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d, double *B,
876 int_t m_loc, int_t fst_row, int_t ldb, int nrhs,
877 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, int *info);
878
879extern void
881 dScalePermstruct_t * ScalePermstruct,
882 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d, double *B,
883 int_t m_loc, int_t fst_row, int_t ldb, int nrhs,
884 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, int *info);
885
886extern int_t pdgsTrBackSolve3d(superlu_dist_options_t *options, int_t n, dLUstruct_t * LUstruct,
887 dScalePermstruct_t * ScalePermstruct,
888 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
889 double *x3d, double *lsum3d,
890 dxT_struct *xT_s,
891 double * recvbuf,
892 MPI_Request * send_req, int nrhs,
893 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
894
896 dScalePermstruct_t * ScalePermstruct,
897 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
898 double *x3d, double *lsum3d,
899 dxT_struct *xT_s,
900 double * recvbuf,
901 MPI_Request * send_req, int nrhs,
902 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
903
905 dScalePermstruct_t * ScalePermstruct,
906 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
907 double *x3d, double *lsum3d,
908 double * recvbuf,
909 MPI_Request * send_req, int nrhs,
910 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
911
913 dtrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
914 double *x3d, double *lsum3d,
915 double * recvbuf,
916 MPI_Request * send_req, int nrhs,
917 dSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
918
920 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT );
921
922extern int_t dlocalSolveXkYk( trtype_t trtype, int_t k, double* x, int nrhs,
923 dLUstruct_t * LUstruct, gridinfo_t * grid,
924 SuperLUStat_t * stat);
925
926extern int_t diBcastXk2Pck(int_t k, double* x, int nrhs,
927 int** sendList, MPI_Request *send_req,
928 dLUstruct_t * LUstruct, gridinfo_t * grid,xtrsTimer_t *xtrsTimer);
929
930extern int_t dtrs_B_init3d(int_t nsupers, double* x, int nrhs, dLUstruct_t * LUstruct, gridinfo3d_t *grid3d);
931extern int_t dtrs_X_gather3d(double* x, int nrhs, dtrf3Dpartition_t* trf3Dpartition,
932 dLUstruct_t* LUstruct,
933 gridinfo3d_t* grid3d, xtrsTimer_t *xtrsTimer);
934extern int_t dfsolveReduceLsum3d(int_t treeId, int_t sender, int_t receiver, double* lsum, double* recvbuf, int nrhs,
935 dtrf3Dpartition_t* trf3Dpartition, dLUstruct_t* LUstruct,
936 gridinfo3d_t* grid3d,xtrsTimer_t *xtrsTimer);
937
938extern int_t dbsolve_Xt_bcast(int_t ilvl, dxT_struct *xT_s, int nrhs, dtrf3Dpartition_t* trf3Dpartition,
939 dLUstruct_t * LUstruct,gridinfo3d_t* grid3d , xtrsTimer_t *xtrsTimer);
940
941extern int_t dp2pSolvedX3d(int_t treeId, int_t sender, int_t receiver, double* x, int nrhs,
942 dtrf3Dpartition_t* trf3Dpartition, dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, xtrsTimer_t *xtrsTimer);
943
944/* Memory-related */
945extern double *doubleMalloc_dist(int_t);
946extern double *doubleCalloc_dist(int_t);
947extern void *duser_malloc_dist (int_t, int_t);
948extern void duser_free_dist (int_t, int_t);
951
952/* Auxiliary routines */
953
959extern void dZeroLblocks(int, int, gridinfo_t *, dLUstruct_t *);
960extern void dZeroUblocks(int iam, int n, gridinfo_t *, dLUstruct_t *);
961extern double dMaxAbsLij(int iam, int n, Glu_persist_t *,
963extern double dMaxAbsUij(int iam, int n, Glu_persist_t *,
965extern void dfill_dist (double *, int_t, double);
966extern void dinf_norm_error_dist (int_t, int_t, double*, int_t,
967 double*, int_t, gridinfo_t*);
968extern void pdinf_norm_error(int, int_t, int_t, double [], int_t,
969 double [], int_t , MPI_Comm);
970extern void dreadhb_dist (int, FILE *, int_t *, int_t *, int_t *,
971 double **, int_t **, int_t **);
972extern void dreadtriple_dist(FILE *, int_t *, int_t *, int_t *,
973 double **, int_t **, int_t **);
974extern void dreadtriple_noheader(FILE *, int_t *, int_t *, int_t *,
975 double **, int_t **, int_t **);
976extern void dreadrb_dist(int, FILE *, int_t *, int_t *, int_t *,
977 double **, int_t **, int_t **);
978extern void dreadMM_dist(FILE *, int_t *, int_t *, int_t *,
979 double **, int_t **, int_t **);
980extern int dread_binary(FILE *, int_t *, int_t *, int_t *,
981 double **, int_t **, int_t **);
982
984 int ldb, int nrhs, gridinfo3d_t *, int *info);
987 SuperLUStat_t *, gridinfo_t *, int *rowequ, int *colequ, int *iinfo);
989 dScalePermstruct_t *, dLUstruct_t *LUstruct, int_t m, int_t n,
991 int job, int Equil, int *rowequ, int *colequ, int *iinfo);
992extern double dcomputeA_Norm(int notran, SuperMatrix *, gridinfo_t *);
994 int_t n, dLUstruct_t *, dScalePermstruct_t * ScalePermstruct,
995 int* supernodeMask, gridinfo_t *, SuperLUStat_t *);
996
997/* Distribute the data for numerical factorization */
1000 dLUstruct_t *, gridinfo_t *);
1001extern void pdGetDiagU(int_t, dLUstruct_t *, gridinfo_t *, double *);
1002
1004
1005/* Routines for debugging */
1006extern void dPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *,
1007 dLocalLU_t *);
1008extern void dPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *,
1009 dLocalLU_t *);
1014extern void Printdouble5(char *, int_t, double *);
1015extern int file_Printdouble5(FILE *, char *, int_t, double *);
1016
1017extern void dGenCOOLblocks(int, int_t, gridinfo_t*,
1018 Glu_persist_t*, dLocalLU_t *, int_t** , int_t** , double ** , int_t* , int_t* );
1019extern void dGenCSCLblocks(int, int_t, gridinfo_t*,
1020 Glu_persist_t*, dLocalLU_t *, double **, int_t **, int_t **, int_t*, int_t*);
1021extern void dGenCSRLblocks(int, int_t, gridinfo_t*,
1022 Glu_persist_t*, dLocalLU_t *, double **, int_t **, int_t **, int_t*, int_t*);
1023
1024/* multi-GPU */
1025#ifdef GPU_ACC
1026// extern void create_nv_buffer(int* , int*, int* , int* );
1027extern void nv_init_wrapper(MPI_Comm);
1028// extern void nv_init_wrapper(int* c, char *v[], int* omp_mpi_level);
1029extern void dprepare_multiGPU_buffers(int,int,int,int,int,int);
1031#endif
1032
1033/* BLAS */
1034
1035#ifdef USE_VENDOR_BLAS
1036extern void dgemm_(const char*, const char*, const int*, const int*, const int*,
1037 const double*, const double*, const int*, const double*,
1038 const int*, const double*, double*, const int*, int, int);
1039extern void dtrsv_(char*, char*, char*, int*, double*, int*,
1040 double*, int*, int, int, int);
1041extern void dtrsm_(const char*, const char*, const char*, const char*,
1042 const int*, const int*, const double*, const double*, const int*,
1043 double*, const int*, int, int, int, int);
1044extern void dgemv_(const char *, const int *, const int *, const double *,
1045 const double *a, const int *, const double *, const int *,
1046 const double *, double *, const int *, int);
1047
1048#else
1049extern int dgemm_(const char*, const char*, const int*, const int*, const int*,
1050 const double*, const double*, const int*, const double*,
1051 const int*, const double*, double*, const int*);
1052extern int dtrsv_(char*, char*, char*, int*, double*, int*,
1053 double*, int*);
1054extern int dtrsm_(const char*, const char*, const char*, const char*,
1055 const int*, const int*, const double*, const double*, const int*,
1056 double*, const int*);
1057extern void dgemv_(const char *, const int *, const int *, const double *,
1058 const double *a, const int *, const double *, const int *,
1059 const double *, double *, const int *);
1060#endif
1061
1062extern void dger_(const int*, const int*, const double*,
1063 const double*, const int*, const double*, const int*,
1064 double*, const int*);
1065
1066extern int dscal_(const int *n, const double *alpha, double *dx, const int *incx);
1067extern int daxpy_(const int *n, const double *alpha, const double *x,
1068 const int *incx, double *y, const int *incy);
1069
1070/* SuperLU BLAS interface: dsuperlu_blas.c */
1071extern int superlu_dgemm(const char *transa, const char *transb,
1072 int m, int n, int k, double alpha, double *a,
1073 int lda, double *b, int ldb, double beta, double *c, int ldc);
1074extern int superlu_dtrsm(const char *sideRL, const char *uplo,
1075 const char *transa, const char *diag, const int m, const int n,
1076 const double alpha, const double *a,
1077 const int lda, double *b, const int ldb);
1078extern int superlu_dger(const int m, const int n, const double alpha,
1079 const double *x, const int incx, const double *y,
1080 const int incy, double *a, const int lda);
1081extern int superlu_dscal(const int n, const double alpha, double *x, const int incx);
1082extern int superlu_daxpy(const int n, const double alpha,
1083 const double *x, const int incx, double *y, const int incy);
1084extern int superlu_dgemv(const char *trans, const int m,
1085 const int n, const double alpha, const double *a,
1086 const int lda, const double *x, const int incx,
1087 const double beta, double *y, const int incy);
1088extern int superlu_dtrsv(char *uplo, char *trans, char *diag,
1089 int n, double *a, int lda, double *x, int incx);
1090
1091#ifdef SLU_HAVE_LAPACK
1092extern void dtrtri_(char*, char*, int*, double*, int*, int*);
1093#endif
1094
1095/*==== For 3D code ====*/
1096extern int dcreate_matrix3d(SuperMatrix *A, int nrhs, double **rhs,
1097 int *ldb, double **x, int *ldx,
1098 FILE *fp, gridinfo3d_t *grid3d);
1099extern int dcreate_matrix_postfix3d(SuperMatrix *A, int nrhs, double **rhs,
1100 int *ldb, double **x, int *ldx,
1101 FILE *fp, char * postfix, gridinfo3d_t *grid3d);
1102extern int dcreate_block_diag_3d(SuperMatrix *A, int batchCount, int nrhs, double **rhs,
1103 int *ldb, double **x, int *ldx,
1104 FILE *fp, char * postfix, gridinfo3d_t *grid3d);
1105extern int dcreate_batch_systems(handle_t *SparseMatrix_handles, int batchCount,
1106 int nrhs, double **rhs, int *ldb, double **x, int *ldx,
1107 FILE *fp, char * postfix, gridinfo3d_t *grid3d);
1108
1109/* Matrix distributed in NRformat_loc in 3D process grid. It converts
1110 it to a NRformat_loc distributed in 2D grid in grid-0 */
1111extern void dGatherNRformat_loc3d(fact_t Fact, NRformat_loc *A, double *B,
1112 int ldb, int nrhs, gridinfo3d_t *grid3d,
1113 NRformat_loc3d **);
1114extern void dGatherNRformat_loc3d_allgrid(fact_t Fact, NRformat_loc *A, double *B,
1115 int ldb, int nrhs, gridinfo3d_t *grid3d,
1116 NRformat_loc3d **);
1117extern int dScatter_B3d(NRformat_loc3d *A3d, gridinfo3d_t *grid3d);
1118
1120 dScalePermstruct_t *, double B[], int ldb, int nrhs,
1122 double *berr, SuperLUStat_t *, int *info);
1123extern int_t pdgstrf3d(superlu_dist_options_t *, int m, int n, double anorm,
1125 gridinfo3d_t *, SuperLUStat_t *, int *);
1126extern void dInit_HyP(superlu_dist_options_t *, HyP_t* HyP, dLocalLU_t *Llu, int_t mcb, int_t mrb);
1127extern void Free_HyP(HyP_t* HyP);
1128extern int updateDirtyBit(int_t k0, HyP_t* HyP, gridinfo_t* grid);
1129
1130 /* from scatter.h */
1131extern void
1133 Remain_info_t *Remain_info, double *L_mat, int ldl,
1134 double *U_mat, int ldu, double *bigV,
1135 // int_t jj0,
1136 int_t knsupc, int_t klst,
1137 int_t *lsub, int_t *usub, int_t ldt,
1138 int_t thread_id,
1139 int *indirect, int *indirect2,
1140 int_t **Lrowind_bc_ptr, double **Lnzval_bc_ptr,
1141 int_t **Ufstnz_br_ptr, double **Unzval_br_ptr,
1142 int_t *xsup, gridinfo_t *, SuperLUStat_t *
1143#ifdef SCATTER_PROFILE
1144 , double *Host_TheadScatterMOP, double *Host_TheadScatterTimer
1145#endif
1146 );
1147
1148#ifdef _OPENMP
1149/*this version uses a lock to prevent multiple thread updating the same block*/
1150extern void
1151dblock_gemm_scatter_lock( int_t lb, int_t j, omp_lock_t* lock,
1152 Ublock_info_t *Ublock_info, Remain_info_t *Remain_info,
1153 double *L_mat, int_t ldl, double *U_mat, int_t ldu,
1154 double *bigV,
1155 // int_t jj0,
1156 int_t knsupc, int_t klst,
1157 int_t *lsub, int_t *usub, int_t ldt,
1158 int_t thread_id,
1159 int *indirect, int *indirect2,
1160 int_t **Lrowind_bc_ptr, double **Lnzval_bc_ptr,
1161 int_t **Ufstnz_br_ptr, double **Unzval_br_ptr,
1162 int_t *xsup, gridinfo_t *
1163#ifdef SCATTER_PROFILE
1164 , double *Host_TheadScatterMOP, double *Host_TheadScatterTimer
1165#endif
1166 );
1167#endif
1168
1169extern int_t
1171 int_t knsupc, int_t klst, int_t* lsub,
1172 int_t * usub, int_t ldt,
1173 int* indirect, int* indirect2,
1174 HyP_t* HyP, dLUstruct_t *, gridinfo_t*,
1175 SCT_t*SCT, SuperLUStat_t *
1176 );
1177extern int_t
1179 int_t knsupc, int_t klst, int_t* lsub,
1180 int_t * usub, int_t ldt,
1181 int* indirect, int* indirect2,
1182 HyP_t* HyP, dLUstruct_t *, gridinfo_t*,
1183 SCT_t*SCT, SuperLUStat_t * );
1184extern int_t
1186 int_t knsupc, int_t klst, int_t* lsub,
1187 int_t * usub, int_t ldt,
1188 int* indirect, int* indirect2,
1189 HyP_t* HyP, dLUstruct_t *, gridinfo_t*,
1190 SCT_t*SCT, SuperLUStat_t * );
1191extern int_t
1193 int_t knsupc, int_t klst, int_t* lsub,
1194 int_t * usub, int_t ldt,
1195 int* indirect, int* indirect2,
1196 HyP_t* HyP, dLUstruct_t *, gridinfo_t*,
1197 SCT_t*SCT, SuperLUStat_t * );
1198
1199 /* from gather.h */
1200extern void dgather_u(int_t num_u_blks,
1201 Ublock_info_t *Ublock_info, int_t * usub,
1202 double *uval, double *bigU, int_t ldu,
1203 int_t *xsup, int_t klst /* for SuperSize */
1204 );
1205
1206extern void dgather_l( int_t num_LBlk, int_t knsupc,
1207 Remain_info_t *L_info,
1208 double * lval, int_t LD_lval,
1209 double * L_buff );
1210
1211extern void dRgather_L(int_t k, int_t *lsub, double *lusup, gEtreeInfo_t*,
1213 int_t *myIperm, int_t *iperm_c_supno );
1214extern void dRgather_U(int_t k, int_t jj0, int_t *usub, double *uval,
1215 double *bigU, gEtreeInfo_t*, Glu_persist_t *,
1216 gridinfo_t *, HyP_t *, int_t *myIperm,
1217 int_t *iperm_c_supno, int_t *perm_u);
1218
1219 /* from pxdistribute3d.h */
1220extern void dbcastPermutedSparseA(SuperMatrix *A,
1221 dScalePermstruct_t *ScalePermstruct,
1222 Glu_freeable_t *Glu_freeable,
1223 dLUstruct_t *LUstruct, gridinfo3d_t *grid3d);
1224
1225extern void dnewTrfPartitionInit(int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d);
1226
1227
1228 /* from xtrf3Dpartition.h */
1230 superlu_dist_options_t *options,
1231 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
1233 superlu_dist_options_t *options,
1234 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
1236 superlu_dist_options_t *options,
1237 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
1238extern void dDestroy_trf3Dpartition(dtrf3Dpartition_t *trf3Dpartition);
1239
1240extern void d3D_printMemUse(dtrf3Dpartition_t* trf3Dpartition,
1241 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
1242
1243//extern int* getLastDep(gridinfo_t *grid, SuperLUStat_t *stat,
1244// superlu_dist_options_t *options, dLocalLU_t *Llu,
1245// int_t* xsup, int_t num_look_aheads, int_t nsupers,
1246// int_t * iperm_c_supno);
1247
1248extern void dinit3DLUstructForest( int_t* myTreeIdxs, int_t* myZeroTrIdxs,
1249 sForest_t** sForests, dLUstruct_t* LUstruct,
1250 gridinfo3d_t* grid3d);
1251
1252extern int_t dgatherAllFactoredLUFr(int_t* myZeroTrIdxs, sForest_t* sForests,
1253 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d,
1254 SCT_t* SCT );
1255
1256 /* The following are from pdgstrf2.h */
1257extern int_t dLpanelUpdate(int_t off0, int_t nsupc, double* ublk_ptr,
1258 int_t ld_ujrow, double* lusup, int_t nsupr, SCT_t*);
1259extern void dgstrf2(int_t k, double* diagBlk, int_t LDA, double* BlockUfactor, int_t LDU,
1260 double thresh, int_t* xsup, superlu_dist_options_t *options,
1261 SuperLUStat_t *stat, int *info);
1262extern void Local_Dgstrf2(superlu_dist_options_t *options, int_t k,
1263 double thresh, double *BlockUFactor, Glu_persist_t *,
1265 SuperLUStat_t *, int *info, SCT_t*);
1266extern int_t dTrs2_GatherU(int_t iukp, int_t rukp, int_t klst,
1267 int_t nsupc, int_t ldu, int_t *usub,
1268 double* uval, double *tempv);
1269extern int_t dTrs2_ScatterU(int_t iukp, int_t rukp, int_t klst,
1270 int_t nsupc, int_t ldu, int_t *usub,
1271 double* uval, double *tempv);
1272extern int_t dTrs2_GatherTrsmScatter(int_t klst, int_t iukp, int_t rukp,
1273 int_t *usub, double* uval, double *tempv,
1274 int_t knsupc, int nsupr, double* lusup,
1275 Glu_persist_t *Glu_persist) ;
1276extern void pdgstrs2
1277#ifdef _CRAY
1278(
1279 int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid,
1280 dLocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3
1281);
1282#else
1283(
1284 int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid,
1285 dLocalLU_t *Llu, SuperLUStat_t *stat
1286);
1287#endif
1288
1289extern void pdgstrf2(superlu_dist_options_t *, int_t nsupers, int_t k0,
1290 int_t k, double thresh, Glu_persist_t *, gridinfo_t *,
1291 dLocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *);
1292
1293 /* from p3dcomm.h */
1294extern int_t dAllocLlu_3d(int_t nsupers, dLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
1295extern int_t dp3dScatter(int_t n, dLUstruct_t * LUstruct, gridinfo3d_t* grid3d, int *supernodeMask);
1297 dLUstruct_t * LUstruct, gridinfo3d_t* grid3d, int *supernodeMask);
1299 dLUstruct_t * LUstruct, gridinfo3d_t* grid3d, int *supernodeMask);
1300extern int_t dcollect3dLpanels(int_t layer, int_t nsupers, dLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
1301extern int_t dcollect3dUpanels(int_t layer, int_t nsupers, dLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
1302extern int_t dp3dCollect(int_t layer, int_t n, dLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
1303/*zero out LU non zero entries*/
1304extern int_t dzeroSetLU(int_t nnodes, int_t* nodeList , dLUstruct_t *, gridinfo3d_t*);
1305extern int dAllocGlu_3d(int_t n, int_t nsupers, dLUstruct_t *);
1306extern int dDeAllocLlu_3d(int_t n, dLUstruct_t *, gridinfo3d_t*);
1307extern int dDeAllocGlu_3d(dLUstruct_t *);
1308
1309/* Reduces L and U panels of nodes in the List nodeList (size=nnnodes)
1310receiver[L(nodelist)] =sender[L(nodelist)] +receiver[L(nodelist)]
1311receiver[U(nodelist)] =sender[U(nodelist)] +receiver[U(nodelist)]
1312*/
1314 int_t nnodes, int_t* nodeList,
1315 double* Lval_buf, double* Uval_buf,
1316 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1317/*reduces all nodelists required in a level*/
1318extern int dreduceAllAncestors3d(int_t ilvl, int_t* myNodeCount,
1319 int_t** treePerm,
1320 dLUValSubBuf_t* LUvsb,
1321 dLUstruct_t* LUstruct,
1322 gridinfo3d_t* grid3d,
1323 SCT_t* SCT );
1324/*
1325 Copies factored L and U panels from sender grid to receiver grid
1326 receiver[L(nodelist)] <-- sender[L(nodelist)];
1327 receiver[U(nodelist)] <-- sender[U(nodelist)];
1328*/
1330 int_t nnodes, int_t *nodeList, dLUValSubBuf_t* LUvsb,
1331 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d,SCT_t* SCT );
1332
1333/*Gathers all the L and U factors to grid 0 for solve stage
1334 By repeatidly calling above function*/
1336 gridinfo3d_t* grid3d, SCT_t* SCT );
1337
1338/*Distributes data in each layer and initilizes ancestors
1339 as zero in required nodes*/
1340int_t dinit3DLUstruct( int_t* myTreeIdxs, int_t* myZeroTrIdxs,
1341 int_t* nodeCount, int_t** nodeList,
1342 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d);
1343
1345 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1346int_t dzRecvLPanel(int_t k, int_t sender, double alpha,
1347 double beta, double* Lval_buf,
1348 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1350 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1351int_t dzRecvUPanel(int_t k, int_t sender, double alpha,
1352 double beta, double* Uval_buf,
1353 dLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1354
1355 /* from communication_aux.h */
1356extern int_t dIBcast_LPanel (int_t k, int_t k0, int_t* lsub, double* lusup,
1357 gridinfo_t *, int* msgcnt, MPI_Request *,
1358 int **ToSendR, int_t *xsup, int );
1360 gridinfo_t *, int* msgcnt, int **ToSendR,
1361 int_t *xsup , SCT_t*, int);
1362extern int_t dIBcast_UPanel(int_t k, int_t k0, int_t* usub, double* uval,
1363 gridinfo_t *, int* msgcnt, MPI_Request *,
1364 int *ToSendD, int );
1365extern int_t dBcast_UPanel(int_t k, int_t k0, int_t* usub, double* uval,
1366 gridinfo_t *, int* msgcnt, int *ToSendD, SCT_t*, int);
1367extern int_t dIrecv_LPanel (int_t k, int_t k0, int_t* Lsub_buf,
1368 double* Lval_buf, gridinfo_t *,
1369 MPI_Request *, dLocalLU_t *, int);
1370extern int_t dIrecv_UPanel(int_t k, int_t k0, int_t* Usub_buf, double*,
1371 dLocalLU_t *, gridinfo_t*, MPI_Request *, int);
1372extern int_t dWait_URecv(MPI_Request *, int* msgcnt, SCT_t *);
1373extern int_t dWait_LRecv(MPI_Request*, int* msgcnt, int* msgcntsU,
1374 gridinfo_t *, SCT_t*);
1375extern int_t dISend_UDiagBlock(int_t k0, double *ublk_ptr, int_t size,
1376 MPI_Request *, gridinfo_t *, int);
1377extern int_t dRecv_UDiagBlock(int_t k0, double *ublk_ptr, int_t size,
1378 int_t src, gridinfo_t *, SCT_t*, int);
1379extern int_t dPackLBlock(int_t k, double* Dest, Glu_persist_t *,
1380 gridinfo_t *, dLocalLU_t *);
1381extern int_t dISend_LDiagBlock(int_t k0, double *lblk_ptr, int_t size,
1382 MPI_Request *, gridinfo_t *, int);
1383extern int_t dIRecv_UDiagBlock(int_t k0, double *ublk_ptr, int_t size,
1384 int_t src, MPI_Request *, gridinfo_t *,
1385 SCT_t*, int);
1386extern int_t dIRecv_LDiagBlock(int_t k0, double *L_blk_ptr, int_t size,
1387 int_t src, MPI_Request *, gridinfo_t*, SCT_t*, int);
1388extern int_t dUDiagBlockRecvWait( int_t k, int* IrecvPlcd_D, int* factored_L,
1389 MPI_Request *, gridinfo_t *, dLUstruct_t *, SCT_t *);
1390
1391#if (MPI_VERSION>2)
1392extern int_t dIBcast_UDiagBlock(int_t k, double *ublk_ptr, int_t size,
1393 MPI_Request *, gridinfo_t *);
1394extern int_t dIBcast_LDiagBlock(int_t k, double *lblk_ptr, int_t size,
1395 MPI_Request *, gridinfo_t *);
1396#endif
1397
1398 /* from trfCommWrapper.h */
1400 double *BlockUFactor, double *BlockLFactor,
1401 int* IrecvPlcd_D, MPI_Request *, MPI_Request *,
1402 MPI_Request *, MPI_Request *, gridinfo_t *,
1403 superlu_dist_options_t *, double thresh,
1404 dLUstruct_t *LUstruct, SuperLUStat_t *, int *info,
1405 SCT_t *, int tag_ub);
1406extern int_t dUPanelTrSolve( int_t k, double* BlockLFactor, double* bigV,
1409extern int_t dLPanelUpdate(int_t k, int* IrecvPlcd_D, int* factored_L,
1410 MPI_Request *, double* BlockUFactor, gridinfo_t *,
1411 dLUstruct_t *, SCT_t *);
1412extern int_t dUPanelUpdate(int_t k, int* factored_U, MPI_Request *,
1413 double* BlockLFactor, double* bigV,
1416extern int_t dIBcastRecvLPanel(int_t k, int_t k0, int* msgcnt,
1417 MPI_Request *, MPI_Request *,
1418 int_t* Lsub_buf, double* Lval_buf,
1419 int * factored, gridinfo_t *, dLUstruct_t *,
1420 SCT_t *, int tag_ub);
1421extern int_t dIBcastRecvUPanel(int_t k, int_t k0, int* msgcnt, MPI_Request *,
1422 MPI_Request *, int_t* Usub_buf, double* Uval_buf,
1423 gridinfo_t *, dLUstruct_t *, SCT_t *, int tag_ub);
1424extern int_t dWaitL(int_t k, int* msgcnt, int* msgcntU, MPI_Request *,
1425 MPI_Request *, gridinfo_t *, dLUstruct_t *, SCT_t *);
1426extern int_t dWaitU(int_t k, int* msgcnt, MPI_Request *, MPI_Request *,
1427 gridinfo_t *, dLUstruct_t *, SCT_t *);
1428extern int_t dLPanelTrSolve(int_t k, int* factored_L, double* BlockUFactor,
1429 gridinfo_t *, dLUstruct_t *);
1430
1431 /* from trfAux.h */
1432extern int getNsupers(int, Glu_persist_t *);
1433extern int_t initPackLUInfo(int_t nsupers, packLUInfo_t* packLUInfo);
1434extern int freePackLUInfo(packLUInfo_t* packLUInfo);
1437 lPanelInfo_t *, int_t*, int_t *, int_t *,
1438 double *bigU, int_t* Lsub_buf,
1439 double* Lval_buf, int_t* Usub_buf,
1440 double* Uval_buf, gridinfo_t *, dLUstruct_t *);
1444 dLUValSubBuf_t* LUvsb, gridinfo_t *,
1445 dLUstruct_t *, HyP_t*);
1446extern double* dgetBigV(int_t, int_t);
1449// permutation from superLU default
1450
1451 /* from treeFactorization.h */
1454 int_t ldt, int_t num_threads, int_t nsupers,
1456extern int dfreeScuBufs(dscuBufs_t* scuBufs);
1457
1458#if 0 // NOT CALLED
1459// the generic tree factoring code
1460extern int_t treeFactor(
1461 int_t nnnodes, // number of nodes in the tree
1462 int_t *perm_c_supno, // list of nodes in the order of factorization
1463 commRequests_t *comReqs, // lists of communication requests
1464 dscuBufs_t *scuBufs, // contains buffers for schur complement update
1465 packLUInfo_t*packLUInfo,
1466 msgs_t*msgs,
1467 dLUValSubBuf_t* LUvsb,
1468 ddiagFactBufs_t *dFBuf,
1469 factStat_t *factStat,
1470 factNodelists_t *fNlists,
1471 superlu_dist_options_t *options,
1472 int_t * gIperm_c_supno,
1473 int_t ldt,
1474 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1475 double thresh, SCT_t *SCT,
1476 int *info
1477);
1478#endif
1479
1481 int_t nnodes, // number of nodes in the tree
1482 int_t *perm_c_supno, // list of nodes in the order of factorization
1483 treeTopoInfo_t* treeTopoInfo,
1484 commRequests_t *comReqs, // lists of communication requests
1485 dscuBufs_t *scuBufs, // contains buffers for schur complement update
1486 packLUInfo_t*packLUInfo,
1487 msgs_t*msgs,
1488 dLUValSubBuf_t* LUvsb,
1489 ddiagFactBufs_t *dFBuf,
1490 factStat_t *factStat,
1491 factNodelists_t *fNlists,
1492 superlu_dist_options_t *options,
1493 int_t * gIperm_c_supno,
1494 int_t ldt,
1495 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1496 double thresh, SCT_t *SCT,
1497 int *info
1498);
1499
1501 int_t nnnodes, // number of nodes in the tree
1502 int_t *perm_c_supno, // list of nodes in the order of factorization
1503 commRequests_t *comReqs, // lists of communication requests
1504 dscuBufs_t *scuBufs, // contains buffers for schur complement update
1505 packLUInfo_t*packLUInfo,
1506 msgs_t*msgs,
1507 dLUValSubBuf_t* LUvsb,
1508 ddiagFactBufs_t *dFBuf,
1509 factStat_t *factStat,
1510 factNodelists_t *fNlists,
1511 superlu_dist_options_t *options,
1512 int_t * gIperm_c_supno,
1513 int_t ldt,
1514 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1515 double thresh, SCT_t *SCT, int tag_ub,
1516 int *info
1517);
1518
1520 sForest_t* sforest,
1521 commRequests_t **comReqss, // lists of communication requests // size maxEtree level
1522 dscuBufs_t *scuBufs, // contains buffers for schur complement update
1523 packLUInfo_t*packLUInfo,
1524 msgs_t**msgss, // size=num Look ahead
1525 dLUValSubBuf_t** LUvsbs, // size=num Look ahead
1526 ddiagFactBufs_t **dFBufs, // size maxEtree level
1527 factStat_t *factStat,
1528 factNodelists_t *fNlists,
1529 gEtreeInfo_t* gEtreeInfo, // global etree info
1530 superlu_dist_options_t *options,
1531 int_t * gIperm_c_supno,
1532 int_t ldt,
1533 HyP_t* HyP,
1534 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1535 double thresh, SCT_t *SCT, int tag_ub,
1536 int *info
1537);
1539extern int dLluBufFreeArr(int_t numLA, dLUValSubBuf_t **LUvsbs);
1540extern ddiagFactBufs_t** dinitDiagFactBufsArr(int mxLeafNode, int ldt, gridinfo_t* grid);
1541extern ddiagFactBufs_t** dinitDiagFactBufsArrMod(int mxLeafNode, int* ldts, gridinfo_t* grid);
1542extern int dfreeDiagFactBufsArr(int mxLeafNode, ddiagFactBufs_t** dFBufs);
1543extern int dinitDiagFactBufs(int ldt, ddiagFactBufs_t* dFBuf);
1544extern int_t checkRecvUDiag(int_t k, commRequests_t *comReqs,
1545 gridinfo_t *grid, SCT_t *SCT);
1546extern int_t checkRecvLDiag(int_t k, commRequests_t *comReqs, gridinfo_t *, SCT_t *);
1547
1548
1549extern int pdflatten_LDATA(superlu_dist_options_t *options, int_t n, dLUstruct_t * LUstruct,
1550 gridinfo_t *grid, SuperLUStat_t * stat);
1552 dLUstruct_t *, SuperLUStat_t *, int n);
1554 dLUstruct_t *, SuperLUStat_t *, int n);
1555
1556extern int_t
1558 Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno,
1559 gridinfo_t *grid, int_t *colptr[], int_t *rowind[],
1560 double *a[]);
1561extern float
1563 dScalePermstruct_t *ScalePermstruct,
1564 Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct,
1565 gridinfo3d_t *grid3d);
1566
1567
1568#if 0 // NOT CALLED
1569/* from ancFactorization.h (not called) */
1570extern int_t ancestorFactor(
1571 int_t ilvl, // level of factorization
1572 sForest_t* sforest,
1573 commRequests_t **comReqss, // lists of communication requests // size maxEtree level
1574 dscuBufs_t *scuBufs, // contains buffers for schur complement update
1575 packLUInfo_t*packLUInfo,
1576 msgs_t**msgss, // size=num Look ahead
1577 dLUValSubBuf_t** LUvsbs, // size=num Look ahead
1578 ddiagFactBufs_t **dFBufs, // size maxEtree level
1579 factStat_t *factStat,
1580 factNodelists_t *fNlists,
1581 gEtreeInfo_t* gEtreeInfo, // global etree info
1582 superlu_dist_options_t *options,
1583 int_t * gIperm_c_supno,
1584 int_t ldt,
1585 HyP_t* HyP,
1586 dLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1587 double thresh, SCT_t *SCT, int tag_ub, int *info
1588);
1589#endif
1590
1591 /* Batch interface */
1592extern int pdgssvx3d_csc_batch(
1593 superlu_dist_options_t *, int batchCount, int m, int n, int nnz,
1594 int nrhs, handle_t *, double **RHSptr, int *ldRHS,
1595 double **ReqPtr, double **CeqPtr,
1596 int **RpivPtr, int **CpivPtr, DiagScale_t *DiagScale,
1597 handle_t *F, double **Xptr, int *ldX, double **Berrs,
1598 gridinfo3d_t *grid3d, SuperLUStat_t *stat, int *info
1599 //DeviceContext context /* device context including queues, events, dependencies */
1600 );
1601extern int dequil_batch(
1602 superlu_dist_options_t *, int batchCount, int m, int n, handle_t *,
1603 double **ReqPtr, double **CeqPtr, DiagScale_t *
1604 // DeviceContext context /* device context including queues, events, dependencies */
1605 );
1606extern int dpivot_batch(
1607 superlu_dist_options_t *, int batchCount, int m, int n, handle_t *,
1608 double **ReqPtr, double **CeqPtr, DiagScale_t *, int **RpivPtr
1609 // DeviceContext context /* device context including queues, events, dependencies */
1610 );
1611
1612/*== end 3D prototypes ===================*/
1613
1614extern double *dready_x;
1615extern double *dready_lsum;
1616
1617#ifdef __cplusplus
1618 }
1619#endif
1620
1621#endif /* __SUPERLU_dDEFS */
1622
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public trans
Definition: superlupara.f90:35
integer, parameter, public factored
Definition: superlupara.f90:35
integer, parameter, public lusup
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: superlu_defs.h:1256
Definition: superlu_defs.h:506
Definition: superlu_defs.h:451
Definition: superlu_defs.h:854
Definition: supermatrix.h:194
Definition: supermatrix.h:176
Definition: psymbfact.h:57
Definition: superlu_defs.h:799
Definition: util_dist.h:199
Definition: util_dist.h:101
Definition: supermatrix.h:54
Definition: superlu_defs.h:789
Definition: superlu_defs.h:781
Definition: superlu_defs.h:1012
Definition: superlu_ddefs.h:310
int_t * Usub_buf
Definition: superlu_ddefs.h:313
int_t * Lsub_buf
Definition: superlu_ddefs.h:311
double * Lval_buf
Definition: superlu_ddefs.h:312
double * Uval_buf
Definition: superlu_ddefs.h:314
Definition: superlu_ddefs.h:340
dLocalLU_t * Llu
Definition: superlu_ddefs.h:343
dtrf3Dpartition_t * trf3Dpart
Definition: superlu_ddefs.h:344
Glu_persist_t * Glu_persist
Definition: superlu_ddefs.h:342
int_t * etree
Definition: superlu_ddefs.h:341
char dt
Definition: superlu_ddefs.h:345
Definition: superlu_ddefs.h:97
double * Lnzval_bc_dat
Definition: superlu_ddefs.h:106
Ucb_indptr_t ** Ucb_indptr
Definition: superlu_ddefs.h:243
int64_t * d_Unzval_br_new_offset
Definition: superlu_ddefs.h:275
int64_t * Unzval_bc_offset
Definition: superlu_ddefs.h:132
int_t * Ucb_valdat
Definition: superlu_ddefs.h:249
int_t * Ucolind_bc_dat
Definition: superlu_ddefs.h:126
long int Ucb_indcnt
Definition: superlu_ddefs.h:246
int * ToSendD
Definition: superlu_ddefs.h:200
long int * Ufstnz_br_offset
Definition: superlu_ddefs.h:166
int64_t * d_Uindval_loc_bc_offset
Definition: superlu_ddefs.h:284
int64_t Uindval_loc_bc_cnt
Definition: superlu_ddefs.h:138
long int Unzval_br_cnt
Definition: superlu_ddefs.h:172
C_Tree * d_LBtree_ptr
Definition: superlu_ddefs.h:300
int_t ** Lindval_loc_bc_ptr
Definition: superlu_ddefs.h:116
long int Uinv_bc_cnt
Definition: superlu_ddefs.h:162
int * bmod
Definition: superlu_ddefs.h:210
int_t ldalsum
Definition: superlu_ddefs.h:221
long int Lindval_loc_bc_cnt
Definition: superlu_ddefs.h:122
int_t * Ucolind_br_dat
Definition: superlu_ddefs.h:146
double * ujrow
Definition: superlu_ddefs.h:189
int_t * d_ilsum
Definition: superlu_ddefs.h:298
int * fmod
Definition: superlu_ddefs.h:205
int_t FRECV
Definition: superlu_ddefs.h:232
long int Ucb_valcnt
Definition: superlu_ddefs.h:251
long int * Lnzval_bc_offset
Definition: superlu_ddefs.h:107
double * Uinv_bc_dat
Definition: superlu_ddefs.h:160
int_t * d_Uind_br_dat
Definition: superlu_ddefs.h:268
double * d_Uinv_bc_dat
Definition: superlu_ddefs.h:278
int * d_bcols_masked
Definition: superlu_ddefs.h:285
int_t n
Definition: superlu_ddefs.h:254
int_t * ut_ilsum
Definition: superlu_ddefs.h:234
long int * d_Lrowind_bc_offset
Definition: superlu_ddefs.h:263
int nfrecvx
Definition: superlu_ddefs.h:208
long int Linv_bc_cnt
Definition: superlu_ddefs.h:114
int_t ** ut_sendx_plist
Definition: superlu_ddefs.h:236
int ** fsendx_plist
Definition: superlu_ddefs.h:206
Ucb_indptr_t * Ucb_inddat
Definition: superlu_ddefs.h:244
int_t * d_Ucolind_br_dat
Definition: superlu_ddefs.h:272
int_t * Unnz
Definition: superlu_ddefs.h:157
double ** Unzval_br_ptr
Definition: superlu_ddefs.h:169
int64_t Unzval_bc_cnt
Definition: superlu_ddefs.h:133
long int Lnzval_bc_cnt
Definition: superlu_ddefs.h:108
int64_t * Unzval_br_new_offset
Definition: superlu_ddefs.h:152
int_t inv
Definition: superlu_ddefs.h:256
int_t ** Ucolind_bc_ptr
Definition: superlu_ddefs.h:125
double ** Uinv_bc_ptr
Definition: superlu_ddefs.h:159
int nbrecvx
Definition: superlu_ddefs.h:213
int nleaf
Definition: superlu_ddefs.h:216
int_t ** Ucb_valptr
Definition: superlu_ddefs.h:248
long int * Unzval_br_offset
Definition: superlu_ddefs.h:171
int_t nfrecvmod
Definition: superlu_ddefs.h:255
double * d_Unzval_bc_dat
Definition: superlu_ddefs.h:270
long int * Linv_bc_offset
Definition: superlu_ddefs.h:113
int64_t * Uind_br_offset
Definition: superlu_ddefs.h:142
int_t * ut_modbit
Definition: superlu_ddefs.h:241
C_Tree * LRtree_ptr
Definition: superlu_ddefs.h:176
int nfsendx
Definition: superlu_ddefs.h:209
double * Linv_bc_dat
Definition: superlu_ddefs.h:112
double ** Linv_bc_ptr
Definition: superlu_ddefs.h:110
int64_t * Ucolind_bc_offset
Definition: superlu_ddefs.h:127
int_t ** Lrowind_bc_ptr
Definition: superlu_ddefs.h:98
long int * d_Linv_bc_offset
Definition: superlu_ddefs.h:279
int64_t * Uindval_loc_bc_offset
Definition: superlu_ddefs.h:137
int_t * utmod
Definition: superlu_ddefs.h:235
long int * Lindval_loc_bc_offset
Definition: superlu_ddefs.h:121
long int * Uinv_bc_offset
Definition: superlu_ddefs.h:161
double * d_Unzval_br_new_dat
Definition: superlu_ddefs.h:274
C_Tree * LBtree_ptr
Definition: superlu_ddefs.h:175
int_t SolveMsgVol
Definition: superlu_ddefs.h:223
int64_t * d_Uind_br_offset
Definition: superlu_ddefs.h:269
int_t * Urbs
Definition: superlu_ddefs.h:242
int64_t Ucolind_bc_cnt
Definition: superlu_ddefs.h:128
double ** Unzval_bc_ptr
Definition: superlu_ddefs.h:130
int_t n_utrecvmod
Definition: superlu_ddefs.h:240
double * Unzval_br_new_dat
Definition: superlu_ddefs.h:151
long int Lrowind_bc_cnt
Definition: superlu_ddefs.h:102
double * d_Lnzval_bc_dat
Definition: superlu_ddefs.h:264
int * ToRecv
Definition: superlu_ddefs.h:199
long int Ufstnz_br_cnt
Definition: superlu_ddefs.h:167
int nbsendx
Definition: superlu_ddefs.h:214
int_t * ilsum
Definition: superlu_ddefs.h:219
int_t * d_Lrowind_bc_dat
Definition: superlu_ddefs.h:262
int_t * Uind_br_dat
Definition: superlu_ddefs.h:141
double ** Unzval_br_new_ptr
Definition: superlu_ddefs.h:150
C_Tree * URtree_ptr
Definition: superlu_ddefs.h:178
int_t ** Uindval_loc_bc_ptr
Definition: superlu_ddefs.h:135
int nroot
Definition: superlu_ddefs.h:217
int_t * Ufstnz_br_dat
Definition: superlu_ddefs.h:165
C_Tree * d_URtree_ptr
Definition: superlu_ddefs.h:303
int64_t * d_Ucolind_br_offset
Definition: superlu_ddefs.h:273
int_t ut_ldalsum
Definition: superlu_ddefs.h:233
int_t UT_SOLVE
Definition: superlu_ddefs.h:230
double ** Lnzval_bc_ptr
Definition: superlu_ddefs.h:104
int nbcol_masked
Definition: superlu_ddefs.h:257
int64_t * d_Lindval_loc_bc_offset
Definition: superlu_ddefs.h:282
double * Unzval_bc_dat
Definition: superlu_ddefs.h:131
long int * Ucb_indoffset
Definition: superlu_ddefs.h:245
C_Tree * UBtree_ptr
Definition: superlu_ddefs.h:177
int_t * Uindval_loc_bc_dat
Definition: superlu_ddefs.h:136
int * frecv
Definition: superlu_ddefs.h:207
int_t * Lindval_loc_bc_dat
Definition: superlu_ddefs.h:120
int64_t Unzval_br_new_cnt
Definition: superlu_ddefs.h:153
long int * Lrowind_bc_offset
Definition: superlu_ddefs.h:101
long int * d_Uinv_bc_offset
Definition: superlu_ddefs.h:280
int_t * d_Lindval_loc_bc_dat
Definition: superlu_ddefs.h:281
int_t n_utrecvx
Definition: superlu_ddefs.h:239
int_t * d_Uindval_loc_bc_dat
Definition: superlu_ddefs.h:283
long int * Ucb_valoffset
Definition: superlu_ddefs.h:250
double * d_Linv_bc_dat
Definition: superlu_ddefs.h:277
int_t * utrecv
Definition: superlu_ddefs.h:237
long int * d_Lnzval_bc_offset
Definition: superlu_ddefs.h:265
int_t ** Ufstnz_br_ptr
Definition: superlu_ddefs.h:164
int * brecv
Definition: superlu_ddefs.h:212
int_t SolveMsgSent
Definition: superlu_ddefs.h:222
int_t ** Uind_br_ptr
Definition: superlu_ddefs.h:140
C_Tree * d_LRtree_ptr
Definition: superlu_ddefs.h:301
int * mod_bit
Definition: superlu_ddefs.h:215
int_t n_utsendx
Definition: superlu_ddefs.h:238
int_t * d_Ucolind_bc_dat
Definition: superlu_ddefs.h:266
int_t ** Lrowind_bc_2_lsum
Definition: superlu_ddefs.h:158
int_t * Lrowind_bc_dat
Definition: superlu_ddefs.h:100
int * bcols_masked
Definition: superlu_ddefs.h:224
int_t * d_xsup
Definition: superlu_ddefs.h:299
C_Tree * d_UBtree_ptr
Definition: superlu_ddefs.h:302
int64_t * Ucolind_br_offset
Definition: superlu_ddefs.h:147
int64_t Ucolind_br_cnt
Definition: superlu_ddefs.h:148
double * Unzval_br_dat
Definition: superlu_ddefs.h:170
gridinfo_t * d_grid
Definition: superlu_ddefs.h:304
int_t ** Ucolind_br_ptr
Definition: superlu_ddefs.h:145
long int * d_Unzval_bc_offset
Definition: superlu_ddefs.h:271
int ** bsendx_plist
Definition: superlu_ddefs.h:211
int_t L_SOLVE
Definition: superlu_ddefs.h:231
int64_t Uind_br_cnt
Definition: superlu_ddefs.h:143
int64_t * d_Ucolind_bc_offset
Definition: superlu_ddefs.h:267
int ** ToSendR
Definition: superlu_ddefs.h:201
Definition: superlu_ddefs.h:371
pdgsmv_comm_t * gsmv_comm
Definition: superlu_ddefs.h:375
pxgstrs_comm_t * gstrs_comm
Definition: superlu_ddefs.h:377
int_t * inv_perm_c
Definition: superlu_ddefs.h:373
int * d_fmod
Definition: superlu_ddefs.h:389
double * d_lsum
Definition: superlu_ddefs.h:387
NRformat_loc3d * A3d
Definition: superlu_ddefs.h:383
int_t * xrow_to_proc
Definition: superlu_ddefs.h:382
int * d_bmod
Definition: superlu_ddefs.h:390
double * d_x
Definition: superlu_ddefs.h:388
int_t * A_colind_gsmv
Definition: superlu_ddefs.h:378
int_t * row_to_proc
Definition: superlu_ddefs.h:372
int_t * diag_len
Definition: superlu_ddefs.h:374
Definition: superlu_ddefs.h:76
double * R
Definition: superlu_ddefs.h:78
int_t * perm_c
Definition: superlu_ddefs.h:81
int_t * perm_r
Definition: superlu_ddefs.h:80
DiagScale_t DiagScale
Definition: superlu_ddefs.h:77
double * C
Definition: superlu_ddefs.h:79
Definition: superlu_ddefs.h:467
double * BlockLFactor
Definition: superlu_ddefs.h:468
double * BlockUFactor
Definition: superlu_ddefs.h:469
Definition: superlu_ddefs.h:481
double * tX
Definition: superlu_ddefs.h:482
int_t * indCols
Definition: superlu_ddefs.h:484
double * tU
Definition: superlu_ddefs.h:483
Definition: superlu_ddefs.h:461
double * bigU
Definition: superlu_ddefs.h:462
double * bigV
Definition: superlu_ddefs.h:463
Definition: superlu_ddefs.h:318
int_t nsupers
Definition: superlu_ddefs.h:319
int_t * myNodeCount
Definition: superlu_ddefs.h:322
SupernodeToGridMap_t * superGridMap
Definition: superlu_ddefs.h:330
sForest_t ** sForests
Definition: superlu_ddefs.h:326
int_t * supernode2treeMap
Definition: superlu_ddefs.h:327
int_t * myZeroTrIdxs
Definition: superlu_ddefs.h:324
int_t ** treePerm
Definition: superlu_ddefs.h:325
dLUValSubBuf_t * LUvsb
Definition: superlu_ddefs.h:329
gEtreeInfo_t gEtreeInfo
Definition: superlu_ddefs.h:320
int maxLvl
Definition: superlu_ddefs.h:331
int mxLeafNode
Definition: superlu_ddefs.h:334
int_t * myTreeIdxs
Definition: superlu_ddefs.h:323
int * supernodeMask
Definition: superlu_ddefs.h:328
int_t * iperm_c_supno
Definition: superlu_ddefs.h:321
int * diagDims
Definition: superlu_ddefs.h:335
int * gemmCsizes
Definition: superlu_ddefs.h:336
Definition: superlu_ddefs.h:474
double * xT
Definition: superlu_ddefs.h:475
int_t * ilsumT
Definition: superlu_ddefs.h:477
int_t ldaspaT
Definition: superlu_ddefs.h:476
Definition: superlu_defs.h:1025
Definition: superlu_defs.h:927
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
Definition: superlu_defs.h:404
Definition: superlu_defs.h:834
Definition: superlu_defs.h:1034
Definition: superlu_defs.h:844
Definition: superlu_ddefs.h:350
int_t TotalIndSend
Definition: superlu_ddefs.h:364
void * val_torecv
Definition: superlu_ddefs.h:363
int * RecvCounts
Definition: superlu_ddefs.h:360
int_t * ind_torecv
Definition: superlu_ddefs.h:353
int_t * ptr_ind_tosend
Definition: superlu_ddefs.h:354
int_t TotalValSend
Definition: superlu_ddefs.h:366
int_t * ind_tosend
Definition: superlu_ddefs.h:352
int_t * extern_start
Definition: superlu_ddefs.h:351
int * SendCounts
Definition: superlu_ddefs.h:358
int_t * ptr_ind_torecv
Definition: superlu_ddefs.h:356
void * val_tosend
Definition: superlu_ddefs.h:362
Definition: superlu_defs.h:567
Definition: superlu_defs.h:989
Definition: superlu_defs.h:773
Definition: superlu_defs.h:728
Definition: superlu_defs.h:970
Definition: superlu_defs.h:825
Definition: superlu_defs.h:1040
int superlu_dger(const int m, const int n, const double alpha, const double *x, const int incx, const double *y, const int incy, double *a, const int lda)
void dlsum_fmod_inv_gpu_wrap(int, int, int, int, double *, double *, int, int, int_t, int *fmod, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, double *, int64_t *, double *, int64_t *, int_t *, int64_t *, int_t *, int *, gridinfo_t *, int_t, uint64_t *, uint64_t *, double *, double *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int)
int_t dLluBufInit(dLUValSubBuf_t *, dLUstruct_t *)
int dstatic_schedule(superlu_dist_options_t *, int, int, dLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int_t *, int_t *, int *)
Definition: dstatic_schedule.c:46
void dprepare_multiGPU_buffers(int, int, int, int, int, int)
void dSolveFinalize(superlu_dist_options_t *, dSOLVEstruct_t *)
Release the resources used for the solution phase.
Definition: pdutil.c:1196
void dgstrf2(int_t k, double *diagBlk, int_t LDA, double *BlockUfactor, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
Definition: pdgstrf2.c:404
void dScaleAddId_CompRowLoc_Matrix_dist(SuperMatrix *, double)
Scale and add I: scales a matrix and adds an identity. A_{i,j} = c * A_{i,j} + \delta_{i,...
Definition: dutil_dist.c:395
int_t initPackLUInfo(int_t nsupers, packLUInfo_t *packLUInfo)
Definition: treeFactorization.c:168
void dScaleAdd_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *, double)
Scale and add: adds a scalar multiple of one matrix to another. A_{i,j} = c * A_{i,...
Definition: dutil_dist.c:420
void dscatter_l(int ib, int ljb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *usub, int_t *lsub, double *tempv, int *indirect_thread, int *indirect2, int_t **Lrowind_bc_ptr, double **Lnzval_bc_ptr, gridinfo_t *grid)
int dDeAllocLlu_3d(int_t n, dLUstruct_t *, gridinfo3d_t *)
Definition: dutil_dist.c:489
int_t pdReDistribute_B_to_X(double *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, double *x, dScalePermstruct_t *, Glu_persist_t *, gridinfo_t *, dSOLVEstruct_t *)
Definition: pdgstrs.c:344
int_t dPackLBlock(int_t k, double *Dest, Glu_persist_t *, gridinfo_t *, dLocalLU_t *)
double dcomputeA_Norm(int notran, SuperMatrix *, gridinfo_t *)
This function computes the norm of a matrix A.
Definition: dssvx3dAux.c:564
int updateDirtyBit(int_t k0, HyP_t *HyP, gridinfo_t *grid)
Definition: sec_structs.c:651
int_t dgatherAllFactoredLU(dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
float pddistribute_allgrid(superlu_dist_options_t *options, int_t n, SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo_t *grid, int *supernodeMask)
Definition: pddistribute.c:2331
void pdGetDiagU(int_t, dLUstruct_t *, gridinfo_t *, double *)
Definition: pdGetDiagU.c:66
int superlu_daxpy(const int n, const double alpha, const double *x, const int incx, double *y, const int incy)
int dcreate_matrix(SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *)
Definition: dcreate_matrix.c:349
int_t dQuerySpace_dist(int_t, dLUstruct_t *, gridinfo_t *, SuperLUStat_t *, superlu_dist_mem_usage_t *)
Definition: dmemory_dist.c:73
int_t dlocalSolveXkYk(trtype_t trtype, int_t k, double *x, int nrhs, dLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: pdgstrs3d.c:6133
void dlsum_fmod_leaf(int_t treeId, dtrf3Dpartition_t *trf3Dpartition, double *lsum, double *x, double *xk, double *rtemp, int nrhs, int knsupc, int_t k, int *fmod, int_t nlb, int_t lptr, int_t luptr, int_t *xsup, gridinfo_t *grid, dLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:2180
int dcreate_matrix3d(SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, gridinfo3d_t *grid3d)
void dGenCSCLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *, double **, int_t **, int_t **, int_t *, int_t *)
Definition: dutil_dist.c:1062
void dZeroUblocks(int iam, int n, gridinfo_t *, dLUstruct_t *)
Sets all entries of matrix U to zero.
Definition: dutil_dist.c:1410
double * dgetBigU(superlu_dist_options_t *, int_t, gridinfo_t *, dLUstruct_t *)
void pdgsmv_init(SuperMatrix *, int_t *, gridinfo_t *, pdgsmv_comm_t *)
Definition: pdgsmv.c:27
void pdgstrs3d_newsolve(superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info)
Definition: pdgstrs3d.c:6936
int_t dISend_UDiagBlock(int_t k0, double *ublk_ptr, int_t size, MPI_Request *, gridinfo_t *, int)
int dfreeScuBufs(dscuBufs_t *scuBufs)
void pdgsrfs(superlu_dist_options_t *, int_t, SuperMatrix *, double, dLUstruct_t *, dScalePermstruct_t *, gridinfo_t *, double[], int_t, double[], int_t, int, dSOLVEstruct_t *, double *, SuperLUStat_t *, int *)
dtrf3Dpartition_t * dinitTrf3Dpartition_allgrid(int_t n, superlu_dist_options_t *options, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void pdgsmv(int_t, SuperMatrix *, gridinfo_t *, pdgsmv_comm_t *, double x[], double ax[])
Definition: pdgsmv.c:235
int_t dreduceAncestors3d(int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, double *Lval_buf, double *Uval_buf, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void dGenCSRLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *, double **, int_t **, int_t **, int_t *, int_t *)
Definition: dutil_dist.c:1215
int_t dTrs2_GatherTrsmScatter(int_t klst, int_t iukp, int_t rukp, int_t *usub, double *uval, double *tempv, int_t knsupc, int nsupr, double *lusup, Glu_persist_t *Glu_persist)
Definition: pdgstrf2.c:804
int_t dcollect3dUpanels(int_t layer, int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t dIRecv_UDiagBlock(int_t k0, double *ublk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int)
struct dlsumBmod_buff_t dlsumBmod_buff_t
int_t ddenseTreeFactor(int_t nnnodes, int_t *perm_c_supno, commRequests_t *comReqs, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, dLUValSubBuf_t *LUvsb, ddiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
void dinit3DLUstructForest(int_t *myTreeIdxs, int_t *myZeroTrIdxs, sForest_t **sForests, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t scuStatUpdate(int_t knsupc, HyP_t *HyP, SCT_t *SCT, SuperLUStat_t *stat)
Definition: sec_structs.c:668
int pdgsmv_AXglobal_abs(int_t, int_t[], double[], int_t[], double[], double[])
Definition: pdgsmv_AXglobal.c:285
void dreadrb_dist(int, FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **)
Definition: dreadrb.c:275
void pdCompute_Diag_Inv(int_t, dLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *)
Definition: pdgstrs.c:842
void pdconvert_flatten_skyline2UROWDATA(superlu_dist_options_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, int n)
Definition: pdgssvx.c:2330
void dperform_row_permutation(superlu_dist_options_t *, fact_t Fact, dScalePermstruct_t *, dLUstruct_t *LUstruct, int_t m, int_t n, gridinfo_t *, SuperMatrix *A, SuperMatrix *GA, SuperLUStat_t *, int job, int Equil, int *rowequ, int *colequ, int *iinfo)
Definition: dssvx3dAux.c:466
int dscal_(const int *n, const double *alpha, double *dx, const int *incx)
ddiagFactBufs_t ** dinitDiagFactBufsArrMod(int mxLeafNode, int *ldts, gridinfo_t *grid)
int_t dWaitL(int_t k, int *msgcnt, int *msgcntU, MPI_Request *, MPI_Request *, gridinfo_t *, dLUstruct_t *, SCT_t *)
double dMaxAbsUij(int iam, int n, Glu_persist_t *, dLUstruct_t *, gridinfo_t *)
Find max(abs(U(i,j)))
Definition: dutil_dist.c:682
int dtrsm_(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *)
void dGenXtrue_dist(int_t, int_t, double *, int_t)
Definition: dutil_dist.c:525
void dgather_l(int_t num_LBlk, int_t knsupc, Remain_info_t *L_info, double *lval, int_t LD_lval, double *L_buff)
int sp_dgemv_dist(char *, double, SuperMatrix *, double *, int, double, double *, int)
SpGEMV.
Definition: dsp_blas2_dist.c:394
int_t dIBcastRecvLPanel(int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Lsub_buf, double *Lval_buf, int *factored, gridinfo_t *, dLUstruct_t *, SCT_t *, int tag_ub)
float pddistribute(superlu_dist_options_t *, int_t, SuperMatrix *, dScalePermstruct_t *, Glu_freeable_t *, dLUstruct_t *, gridinfo_t *)
Definition: pddistribute.c:329
int_t dDiagFactIBCast(int_t k, int_t k0, double *BlockUFactor, double *BlockLFactor, int *IrecvPlcd_D, MPI_Request *, MPI_Request *, MPI_Request *, MPI_Request *, gridinfo_t *, superlu_dist_options_t *, double thresh, dLUstruct_t *LUstruct, SuperLUStat_t *, int *info, SCT_t *, int tag_ub)
int_t dzRecvLPanel(int_t k, int_t sender, double alpha, double beta, double *Lval_buf, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
float pddistribute_allgrid_index_only(superlu_dist_options_t *options, int_t n, SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo_t *grid, int *supernodeMask)
Definition: pddistribute.c:3423
int_t dIBcast_UPanel(int_t k, int_t k0, int_t *usub, double *uval, gridinfo_t *, int *msgcnt, MPI_Request *, int *ToSendD, int)
int getNsupers(int, Glu_persist_t *)
Definition: trfAux.c:42
double pdlangs(char *, SuperMatrix *, gridinfo_t *)
Definition: pdlangs.c:65
int daxpy_(const int *n, const double *alpha, const double *x, const int *incx, double *y, const int *incy)
void dGenCOOLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *, int_t **, int_t **, double **, int_t *, int_t *)
Definition: dutil_dist.c:955
int_t dSchurComplementSetup(int_t k, int *msgcnt, Ublock_info_t *, Remain_info_t *, uPanelInfo_t *, lPanelInfo_t *, int_t *, int_t *, int_t *, double *bigU, int_t *Lsub_buf, double *Lval_buf, int_t *Usub_buf, double *Uval_buf, gridinfo_t *, dLUstruct_t *)
void dDestroy_A3d_gathered_on_2d(dSOLVEstruct_t *, gridinfo3d_t *)
Definition: pdutil.c:1244
ddiagFactBufs_t ** dinitDiagFactBufsArr(int mxLeafNode, int ldt, gridinfo_t *grid)
int dtrs_compute_communication_structure(superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, int *supernodeMask, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: pdgstrs3d.c:153
int_t dzSendLPanel(int_t k, int_t receiver, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void dCreate_CompRowLoc_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, int_t, int_t, double *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
void Free_HyP(HyP_t *HyP)
Definition: sec_structs.c:627
void dlsum_bmod_inv(double *, double *, double *, double *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int, int)
Definition: pdgstrs_lsum.c:1364
int_t pdgsTrBackSolve3d_newsolve(superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x3d, double *lsum3d, double *recvbuf, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:7692
int_t dblock_gemm_scatterTopLeft(int_t lb, int_t j, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, dLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int_t dUDiagBlockRecvWait(int_t k, int *IrecvPlcd_D, int *factored_L, MPI_Request *, gridinfo_t *, dLUstruct_t *, SCT_t *)
void dlsum_fmod(double *, double *, double *, double *, int, int, int_t, int *fmod, int_t, int_t, int_t, int_t *, gridinfo_t *, dLocalLU_t *, MPI_Request[], SuperLUStat_t *)
Definition: pdgstrs_lsum.c:62
int_t dIRecv_LDiagBlock(int_t k0, double *L_blk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int)
void dScalePermstructFree(dScalePermstruct_t *)
Deallocate ScalePermstruct.
Definition: dutil_dist.c:448
void dgsequ_dist(SuperMatrix *, double *, double *, double *, double *, double *, int *)
Definition: dgsequ_dist.c:92
int_t dzSendUPanel(int_t k, int_t receiver, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int pdPermute_Dense_Matrix(int_t, int_t, int_t[], int_t[], double[], int, double[], int, int, gridinfo_t *)
Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-t...
Definition: pdutil.c:297
void dRgather_U(int_t k, int_t jj0, int_t *usub, double *uval, double *bigU, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno, int_t *perm_u)
void pdgstrs3d(superlu_dist_options_t *, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info)
Definition: pdgstrs3d.c:6608
void dlsum_bmod_inv_master(double *, double *, double *, double *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t **, int_t, int_t, int, int)
Definition: pdgstrs_lsum.c:1835
int_t checkRecvUDiag(int_t k, commRequests_t *comReqs, gridinfo_t *grid, SCT_t *SCT)
Definition: treeFactorization.c:205
int pdgsmv_AXglobal(int_t, int_t[], double[], int_t[], double[], double[])
Definition: pdgsmv_AXglobal.c:259
int_t dAllocLlu_3d(int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void pdgstrs2(int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, dLocalLU_t *Llu, SuperLUStat_t *stat)
void dgemv_(const char *, const int *, const int *, const double *, const double *a, const int *, const double *, const int *, const double *, double *, const int *)
int_t pdgstrs_delete_device_lsum_x(dSOLVEstruct_t *)
Definition: pdutil.c:1067
void pdgsequ(SuperMatrix *, double *, double *, double *, double *, double *, int *, gridinfo_t *)
Definition: pdgsequ.c:86
void validateInput_pdgssvx3d(superlu_dist_options_t *, SuperMatrix *A, int ldb, int nrhs, gridinfo3d_t *, int *info)
Validates the input parameters for a given problem.
Definition: dssvx3dAux.c:24
void dinf_norm_error_dist(int_t, int_t, double *, int_t, double *, int_t, gridinfo_t *)
Check the inf-norm of the error vector.
Definition: dutil_dist.c:593
int pdgsmv_AXglobal_setup(SuperMatrix *, Glu_persist_t *, gridinfo_t *, int_t *, int_t *[], double *[], int_t *[], int_t[])
int_t dUPanelUpdate(int_t k, int *factored_U, MPI_Request *, double *BlockLFactor, double *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, SCT_t *)
void dGatherNRformat_loc3d(fact_t Fact, NRformat_loc *A, double *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **)
int dfreeDiagFactBufsArr(int mxLeafNode, ddiagFactBufs_t **dFBufs)
int_t dcollect3dLpanels(int_t layer, int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t dscatter3dUPanels(int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
int_t dlasum_bmod_Tree(int_t pTree, int_t cTree, double *lsum, double *x, dxT_struct *xT_s, int nrhs, dlsumBmod_buff_t *lbmod_buf, dLUstruct_t *LUstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, SuperLUStat_t *stat)
Definition: pdgstrs3d.c:3892
void dtrtri_(char *, char *, int *, double *, int *, int *)
void d3D_printMemUse(dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: dmemory_dist.c:243
int dcreate_matrix_postfix3d(SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
Definition: dcreate_matrix3d.c:72
int_t dbsolve_Xt_bcast(int_t ilvl, dxT_struct *xT_s, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:1720
void dreadMM_dist(FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **)
Definition: dreadMM.c:38
int_t dnonLeafForestForwardSolve3d(int_t treeId, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x, double *lsum, dxT_struct *xT_s, double *recvbuf, double *rtemp, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:1900
float pddistribute3d_Yang(superlu_dist_options_t *options, int_t n, SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: pddistribute3d.c:1357
int_t dinitLsumBmod_buff(int_t ns, int nrhs, dlsumBmod_buff_t *lbmod_buf)
Definition: pdgstrs3d.c:3925
void dlsum_fmod_inv_master(double *, double *, double *, double *, int, int, int_t, int *fmod, int_t, int_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t **, int_t, int_t, int_t, int_t, int, int)
Definition: pdgstrs_lsum.c:963
int freePackLUInfo(packLUInfo_t *packLUInfo)
Definition: treeFactorization.c:177
void dDestroy_Tree(int_t, gridinfo_t *, dLUstruct_t *)
Destroy broadcast and reduction trees used in triangular solve.
Definition: pdutil.c:1312
void dblock_gemm_scatter(int_t lb, int_t j, Ublock_info_t *Ublock_info, Remain_info_t *Remain_info, double *L_mat, int ldl, double *U_mat, int ldu, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int_t thread_id, int *indirect, int *indirect2, int_t **Lrowind_bc_ptr, double **Lnzval_bc_ptr, int_t **Ufstnz_br_ptr, double **Unzval_br_ptr, int_t *xsup, gridinfo_t *, SuperLUStat_t *)
struct dxT_struct dxT_struct
void nv_init_wrapper(MPI_Comm)
int pdgssvx3d_csc_batch(superlu_dist_options_t *, int batchCount, int m, int n, int nnz, int nrhs, handle_t *, double **RHSptr, int *ldRHS, double **ReqPtr, double **CeqPtr, int **RpivPtr, int **CpivPtr, DiagScale_t *DiagScale, handle_t *F, double **Xptr, int *ldX, double **Berrs, gridinfo3d_t *grid3d, SuperLUStat_t *stat, int *info)
Solve a batch of linear systems Ai * Xi = Bi with direct method, computing the LU factorization of ea...
Definition: pdgssvx3d_csc_batch.c:81
int pdflatten_LDATA(superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: pdgssvx.c:2520
void dDestroy_LU(int_t, gridinfo_t *, dLUstruct_t *)
Destroy distributed L & U matrices.
Definition: pdutil.c:442
void pdgsrfs_ABXglobal(superlu_dist_options_t *, int_t, SuperMatrix *, double, dLUstruct_t *, gridinfo_t *, double *, int_t, double *, int_t, int, double *, SuperLUStat_t *, int *)
Definition: pdgsrfs_ABXglobal.c:125
void pdgssvx3d(superlu_dist_options_t *, SuperMatrix *, dScalePermstruct_t *, double B[], int ldb, int nrhs, gridinfo3d_t *, dLUstruct_t *, dSOLVEstruct_t *, double *berr, SuperLUStat_t *, int *info)
Definition: pdgssvx3d.c:710
int_t dWait_LRecv(MPI_Request *, int *msgcnt, int *msgcntsU, gridinfo_t *, SCT_t *)
void dCopy_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *)
Definition: dutil_dist.c:363
float ddist_psymbtonum(superlu_dist_options_t *, int_t, SuperMatrix *, dScalePermstruct_t *, Pslu_freeable_t *, dLUstruct_t *, gridinfo_t *)
Definition: pdsymbfact_distdata.c:1219
void dCreate_CompCol_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, double *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
void ddelete_multiGPU_buffers()
int_t dgatherAllFactoredLUFr(int_t *myZeroTrIdxs, sForest_t *sForests, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void pdgstrs_Bglobal(superlu_dist_options_t *, int_t, dLUstruct_t *, gridinfo_t *, double *, int_t, int, SuperLUStat_t *, int *)
Definition: pdgstrs_Bglobal.c:108
int_t dTrs2_ScatterU(int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, double *uval, double *tempv)
Definition: pdgstrf2.c:782
int_t dsparseTreeFactor(int_t nnodes, int_t *perm_c_supno, treeTopoInfo_t *treeTopoInfo, commRequests_t *comReqs, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, dLUValSubBuf_t *LUvsb, ddiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int *info)
void dPrint_Dense_Matrix_dist(SuperMatrix *)
int_t dblock_gemm_scatterTopRight(int_t lb, int_t j, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, dLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
void dRgather_L(int_t k, int_t *lsub, double *lusup, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno)
int_t dReDistribute_A(SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno, gridinfo_t *grid, int_t *colptr[], int_t *rowind[], double *a[])
Definition: pddistribute.c:67
int_t dLPanelUpdate(int_t k, int *IrecvPlcd_D, int *factored_L, MPI_Request *, double *BlockUFactor, gridinfo_t *, dLUstruct_t *, SCT_t *)
void dlsum_fmod_leaf_newsolve(dtrf3Dpartition_t *trf3Dpartition, double *lsum, double *x, double *xk, double *rtemp, int nrhs, int knsupc, int_t k, int *fmod, int_t nlb, int_t lptr, int_t luptr, int_t *xsup, gridinfo_t *grid, dLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:3704
int_t dblock_gemm_scatterBottomLeft(int_t lb, int_t j, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, dLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
void dlsum_bmod_inv_gpu_wrap(superlu_dist_options_t *, int, int, int, int, double *, double *, int, int, int_t, int *, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, int_t *, int64_t *, int_t *, int64_t *, double *, int64_t *, double *, int64_t *, double *, int64_t *, int_t *, int64_t *, int_t *, gridinfo_t *, int_t, uint64_t *, uint64_t *, double *, double *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int)
int dpivot_batch(superlu_dist_options_t *, int batchCount, int m, int n, handle_t *, double **ReqPtr, double **CeqPtr, DiagScale_t *, int **RpivPtr)
Compute row pivotings for each matrix, for numerical stability.
Definition: dpivot_batch.c:45
int dScatter_B3d(NRformat_loc3d *A3d, gridinfo3d_t *grid3d)
void dgather_u(int_t num_u_blks, Ublock_info_t *Ublock_info, int_t *usub, double *uval, double *bigU, int_t ldu, int_t *xsup, int_t klst)
int file_dPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A)
int_t dIBcastRecvUPanel(int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Usub_buf, double *Uval_buf, gridinfo_t *, dLUstruct_t *, SCT_t *, int tag_ub)
void dZeroLblocks(int, int, gridinfo_t *, dLUstruct_t *)
Sets all entries of matrix L to zero.
Definition: dutil_dist.c:779
int_t dLPanelTrSolve(int_t k, int *factored_L, double *BlockUFactor, gridinfo_t *, dLUstruct_t *)
int sp_dtrsv_dist(char *, char *, char *, SuperMatrix *, SuperMatrix *, double *, int *)
Definition: dsp_blas2_dist.c:95
#define MAX_LOOKAHEADS
Definition: superlu_ddefs.h:96
int dldperm_dist(int, int, int_t, int_t[], int_t[], double[], int_t *, double[], double[])
Definition: dldperm_dist.c:96
int_t dLpanelUpdate(int_t off0, int_t nsupc, double *ublk_ptr, int_t ld_ujrow, double *lusup, int_t nsupr, SCT_t *)
void * duser_malloc_dist(int_t, int_t)
Definition: dmemory_dist.c:30
void Local_Dgstrf2(superlu_dist_options_t *options, int_t k, double thresh, double *BlockUFactor, Glu_persist_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t *, int *info, SCT_t *)
Definition: pdgstrf2.c:402
void dscaleMatrixDiagonally(fact_t Fact, dScalePermstruct_t *, SuperMatrix *, SuperLUStat_t *, gridinfo_t *, int *rowequ, int *colequ, int *iinfo)
Definition: dssvx3dAux.c:162
int_t dWait_URecv(MPI_Request *, int *msgcnt, SCT_t *)
void dLUstructFree(dLUstruct_t *)
Deallocate LUstruct.
Definition: pdutil.c:422
int dPrint_CompRowLoc_Matrix_dist(SuperMatrix *)
void dCreate_SuperNode_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, double *, int_t *, int_t *, int_t *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
void pdgssvx_ABglobal(superlu_dist_options_t *, SuperMatrix *, dScalePermstruct_t *, double *, int, int, gridinfo_t *, dLUstruct_t *, double *, SuperLUStat_t *, int *)
void dPrint_CompCol_Matrix_dist(SuperMatrix *)
int dtrsv_(char *, char *, char *, int *, double *, int *, double *, int *)
int_t dnonLeafForestBackSolve3d(int_t treeId, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x, double *lsum, dxT_struct *xT_s, double *recvbuf, MPI_Request *send_req, int nrhs, dlsumBmod_buff_t *lbmod_buf, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:4190
int_t dIrecv_LPanel(int_t k, int_t k0, int_t *Lsub_buf, double *Lval_buf, gridinfo_t *, MPI_Request *, dLocalLU_t *, int)
void pdgstrf2_trsm(superlu_dist_options_t *options, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, dLocalLU_t *, MPI_Request *, int tag_ub, SuperLUStat_t *, int *info)
Definition: pdgstrf2.c:143
int_t pdgstrs_init(int_t, int_t, int_t, int_t, int_t[], int_t[], gridinfo_t *grid, Glu_persist_t *, dSOLVEstruct_t *)
Definition: pdutil.c:650
int_t dISend_LDiagBlock(int_t k0, double *lblk_ptr, int_t size, MPI_Request *, gridinfo_t *, int)
int_t dgatherFactoredLU(int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, dLUValSubBuf_t *LUvsb, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
double * dready_lsum
Definition: pdgstrs3d.c:150
int_t dbroadcastAncestor3d(dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t diBcastXk2Pck(int_t k, double *x, int nrhs, int **sendList, MPI_Request *send_req, dLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:6182
void dallocScalePermstruct_RC(dScalePermstruct_t *, int_t m, int_t n)
Definition: dssvx3dAux.c:586
int_t dtrs_X_gather3d(double *x, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:1552
int_t pdgstrf(superlu_dist_options_t *, int, int, double anorm, dLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *)
Definition: pdgstrf.c:243
int_t dSchurComplementSetupGPU(int_t k, msgs_t *msgs, packLUInfo_t *, int_t *, int_t *, int_t *, gEtreeInfo_t *, factNodelists_t *, dscuBufs_t *, dLUValSubBuf_t *LUvsb, gridinfo_t *, dLUstruct_t *, HyP_t *)
int dAllocGlu_3d(int_t n, int_t nsupers, dLUstruct_t *)
Definition: dutil_dist.c:471
int_t dBcast_UPanel(int_t k, int_t k0, int_t *usub, double *uval, gridinfo_t *, int *msgcnt, int *ToSendD, SCT_t *, int)
int dequil_batch(superlu_dist_options_t *, int batchCount, int m, int n, handle_t *, double **ReqPtr, double **CeqPtr, DiagScale_t *)
Equilibrate the systems using the LAPACK-style algorithm.
Definition: dequil_batch.c:43
int file_Printdouble5(FILE *, char *, int_t, double *)
Definition: dutil_dist.c:626
void dreadhb_dist(int, FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **)
Definition: dreadhb.c:107
int_t pdgsTrForwardSolve3d(superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x3d, double *lsum3d, dxT_struct *xT_s, double *recvbuf, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:7312
int_t dTrs2_GatherU(int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, double *uval, double *tempv)
Definition: pdgstrf2.c:757
void dCompCol_to_CompRow_dist(int_t m, int_t n, int_t nnz, double *a, int_t *colptr, int_t *rowind, double **at, int_t **rowptr, int_t **colind)
int_t dscatter3dLPanels(int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
void pdgstrs2_omp(int_t k0, int_t k, Glu_persist_t *, gridinfo_t *, dLocalLU_t *, Ublock_info_t *, SuperLUStat_t *)
Definition: pdgstrf2.c:850
int superlu_dgemv(const char *trans, const int m, const int n, const double alpha, const double *a, const int lda, const double *x, const int incx, const double beta, double *y, const int incy)
int_t pdgstrf3d(superlu_dist_options_t *, int m, int n, double anorm, dtrf3Dpartition_t *, SCT_t *, dLUstruct_t *, gridinfo3d_t *, SuperLUStat_t *, int *)
Definition: pdgstrf3d.c:121
void dCompRow_to_CompCol_dist(int_t, int_t, int_t, double *, int_t *, int_t *, double **, int_t **, int_t **)
void dscatter_u(int ib, int jb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *lsub, int_t *usub, double *tempv, int_t **Ufstnz_br_ptr, double **Unzval_br_ptr, gridinfo_t *grid)
int_t pdgstrs_init_device_lsum_x(superlu_dist_options_t *, int_t, int_t, int_t, gridinfo_t *, dLUstruct_t *, dSOLVEstruct_t *, int *)
Definition: pdutil.c:779
int_t dRecv_UDiagBlock(int_t k0, double *ublk_ptr, int_t size, int_t src, gridinfo_t *, SCT_t *, int)
void dLUstructInit(const int_t, dLUstruct_t *)
Allocate storage in LUstruct.
Definition: pdutil.c:408
void dCreate_Dense_Matrix_dist(SuperMatrix *, int_t, int_t, double *, int_t, Stype_t, Dtype_t, Mtype_t)
void dallocateA_dist(int_t, int_t, double **, int_t **, int_t **)
Definition: dmemory_dist.c:147
void dlsum_fmod_inv(double *, double *, double *, double *, int, int_t, int *fmod, int_t *, gridinfo_t *, dLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int_t, int_t, int, int)
Definition: pdgstrs_lsum.c:416
int_t dinitScuBufs(superlu_dist_options_t *, int_t ldt, int_t num_threads, int_t nsupers, dscuBufs_t *, dLUstruct_t *, gridinfo_t *)
int dcreate_matrix_rb(SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *)
int_t dreduceSolvedX_newsolve(int_t treeId, int_t sender, int_t receiver, double *x, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, double *recvbuf, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:1493
int_t dp3dScatter(int_t n, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
int_t dtrs_B_init3d(int_t nsupers, double *x, int nrhs, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: pdgstrs3d.c:32
int superlu_dtrsm(const char *sideRL, const char *uplo, const char *transa, const char *diag, const int m, const int n, const double alpha, const double *a, const int lda, double *b, const int ldb)
dtrf3Dpartition_t * dinitTrf3Dpartition(int_t nsupers, superlu_dist_options_t *options, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void pxgstrs_finalize(pxgstrs_comm_t *)
Definition: util.c:318
void dlsum_bmod(double *, double *, double *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, dLocalLU_t *, MPI_Request[], SuperLUStat_t *)
Definition: pdgstrs_lsum.c:246
dtrf3Dpartition_t * dinitTrf3DpartitionLUstructgrid0(int_t n, superlu_dist_options_t *options, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void pdgsrfs3d(superlu_dist_options_t *, int_t, SuperMatrix *, double, dLUstruct_t *, dScalePermstruct_t *, gridinfo3d_t *, dtrf3Dpartition_t *, double *, int_t, double *, int_t, int, dSOLVEstruct_t *, double *, SuperLUStat_t *, int *)
Definition: pdgsrfs.c:364
void pdinf_norm_error(int, int_t, int_t, double[], int_t, double[], int_t, MPI_Comm)
Check the inf-norm of the error vector.
Definition: pdutil.c:1274
int dcreate_batch_systems(handle_t *SparseMatrix_handles, int batchCount, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
Definition: dcreate_matrix3d.c:613
int dread_binary(FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **)
Definition: dbinary_io.c:4
int_t dlsumForestBsolve(int_t k, int_t treeId, double *lsum, double *x, dxT_struct *xT_s, int nrhs, dlsumBmod_buff_t *lbmod_buf, dLUstruct_t *LUstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, SuperLUStat_t *stat)
Definition: pdgstrs3d.c:4045
int_t pdReDistribute3d_X_to_B(int_t n, double *B, int_t m_loc, int_t ldb, int_t fst_row, int nrhs, double *x, int_t *ilsum, dScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo3d_t *grid3d, dSOLVEstruct_t *SOLVEstruct)
Definition: pdgstrs3d.c:6408
double * doubleMalloc_dist(int_t)
Definition: dmemory_dist.c:155
int pdCompRow_loc_to_CompCol_global(int_t, SuperMatrix *, gridinfo_t *, SuperMatrix *)
Gather A from the distributed compressed row format to global A in compressed column format.
Definition: pdutil.c:35
int dSolveInit(superlu_dist_options_t *, SuperMatrix *, int_t[], int_t[], int_t, dLUstruct_t *, gridinfo_t *, dSOLVEstruct_t *)
Initialize the data structure for the solution phase.
Definition: pdutil.c:1108
void dlsum_bmod_GG(double *lsum, double *x, double *xk, int nrhs, dlsumBmod_buff_t *lbmod_buf, int_t k, int *bmod, int_t *Urbs, Ucb_indptr_t **Ucb_indptr, int_t **Ucb_valptr, int_t *xsup, gridinfo_t *grid, dLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:5687
int_t dIBcast_LPanel(int_t k, int_t k0, int_t *lsub, double *lusup, gridinfo_t *, int *msgcnt, MPI_Request *, int **ToSendR, int_t *xsup, int)
void pdgstrf2(superlu_dist_options_t *, int_t nsupers, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, dLocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *)
void dPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *)
Print the blocks in the factored matrix U.
Definition: dutil_dist.c:1369
void dInit_HyP(superlu_dist_options_t *, HyP_t *HyP, dLocalLU_t *Llu, int_t mcb, int_t mrb)
int_t dleafForestBackSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x, double *lsum, double *recvbuf, MPI_Request *send_req, int nrhs, dlsumBmod_buff_t *lbmod_buf, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:4279
int dreduceAllAncestors3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm, dLUValSubBuf_t *LUvsb, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t dzeroSetLU(int_t nnodes, int_t *nodeList, dLUstruct_t *, gridinfo3d_t *)
int_t dWaitU(int_t k, int *msgcnt, MPI_Request *, MPI_Request *, gridinfo_t *, dLUstruct_t *, SCT_t *)
int dcreate_matrix_postfix(SuperMatrix *, int, double **, int *, double **, int *, FILE *, char *, gridinfo_t *)
Definition: dcreate_matrix.c:76
int_t dUPanelTrSolve(int_t k, double *BlockLFactor, double *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, SCT_t *)
int superlu_dgemm(const char *transa, const char *transb, int m, int n, int k, double alpha, double *a, int lda, double *b, int ldb, double beta, double *c, int ldc)
int_t dlsumReducePrK(int_t k, double *x, double *lsum, double *recvbuf, int nrhs, dLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:4148
int superlu_dscal(const int n, const double alpha, double *x, const int incx)
int dDeAllocGlu_3d(dLUstruct_t *)
Definition: dutil_dist.c:481
double dMaxAbsLij(int iam, int n, Glu_persist_t *, dLUstruct_t *, gridinfo_t *)
Find max(abs(L(i,j)))
Definition: dutil_dist.c:641
void dClone_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *)
float ddistribute(superlu_dist_options_t *, int_t, SuperMatrix *, Glu_freeable_t *, dLUstruct_t *, gridinfo_t *)
Definition: ddistribute.c:67
double dlangs_dist(char *, SuperMatrix *)
Definition: dlangs_dist.c:72
int dLluBufFreeArr(int_t numLA, dLUValSubBuf_t **LUvsbs)
void dGatherNRformat_loc3d_allgrid(fact_t Fact, NRformat_loc *A, double *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **)
void pdgsmv_finalize(pdgsmv_comm_t *)
Definition: pdgsmv.c:371
void dScalePermstructInit(const int_t, const int_t, dScalePermstruct_t *)
Allocate storage in ScalePermstruct.
Definition: dutil_dist.c:437
void dlaqgs_dist(SuperMatrix *, double *, double *, double, double, double, char *)
Definition: dlaqgs_dist.c:93
void pdgstrs(superlu_dist_options_t *, int_t, dLUstruct_t *, dScalePermstruct_t *, gridinfo_t *, double *, int_t, int_t, int_t, int, dSOLVEstruct_t *, SuperLUStat_t *, int *)
Definition: pdgstrs.c:1039
int d_c2cpp_GetHWPM(SuperMatrix *, gridinfo_t *, dScalePermstruct_t *)
Definition: d_c2cpp_GetHWPM.cpp:55
void dnewTrfPartitionInit(int_t nsupers, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: d3DPartition.c:5
int dgemm_(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *)
void dger_(const int *, const int *, const double *, const double *, const int *, const double *, const int *, double *, const int *)
int superlu_dtrsv(char *uplo, char *trans, char *diag, int n, double *a, int lda, double *x, int incx)
void dCopy_Dense_Matrix_dist(int_t, int_t, double *, int_t, double *, int_t)
int dcreate_matrix_dat(SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *)
int_t dBcast_LPanel(int_t k, int_t k0, int_t *lsub, double *lusup, gridinfo_t *, int *msgcnt, int **ToSendR, int_t *xsup, SCT_t *, int)
void dDestroy_trf3Dpartition(dtrf3Dpartition_t *trf3Dpartition)
int_t dfsolveReduceLsum3d(int_t treeId, int_t sender, int_t receiver, double *lsum, double *recvbuf, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:1647
void dreadtriple_dist(FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **)
Definition: dreadtriple.c:35
int_t pdReDistribute3d_B_to_X(double *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, double *x, dScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo3d_t *grid3d, dSOLVEstruct_t *SOLVEstruct)
Definition: pdgstrs3d.c:6269
void duser_free_dist(int_t, int_t)
Definition: dmemory_dist.c:49
void dZero_CompRowLoc_Matrix_dist(SuperMatrix *)
Sets all entries of a matrix to zero, A_{i,j}=0, for i,j=1,..,n.
Definition: dutil_dist.c:378
int_t dzRecvUPanel(int_t k, int_t sender, double alpha, double beta, double *Uval_buf, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t dinit3DLUstruct(int_t *myTreeIdxs, int_t *myZeroTrIdxs, int_t *nodeCount, int_t **nodeList, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t checkRecvLDiag(int_t k, commRequests_t *comReqs, gridinfo_t *, SCT_t *)
Definition: treeFactorization.c:226
double * dgetBigV(int_t, int_t)
void dFillRHS_dist(char *, int_t, double *, int_t, SuperMatrix *, double *, int_t)
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's.
Definition: dutil_dist.c:570
double * dready_x
Definition: pdgstrs3d.c:150
void dreadtriple_noheader(FILE *, int_t *, int_t *, int_t *, double **, int_t **, int_t **)
Definition: dreadtriple_noheader.c:35
void dComputeLevelsets(int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *, int_t *)
Definition: dutil_dist.c:913
int_t dsparseTreeFactor_ASYNC(sForest_t *sforest, commRequests_t **comReqss, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, dLUValSubBuf_t **LUvsbs, ddiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, HyP_t *HyP, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
double * doubleCalloc_dist(int_t)
Definition: dmemory_dist.c:162
void Printdouble5(char *, int_t, double *)
Definition: dutil_dist.c:614
int_t dp2pSolvedX3d(int_t treeId, int_t sender, int_t receiver, double *x, int nrhs, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:1597
int dinitDiagFactBufs(int ldt, ddiagFactBufs_t *dFBuf)
int_t dp3dCollect(int_t layer, int_t n, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void pdconvertUROWDATA2skyline(superlu_dist_options_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, int n)
Definition: pdgssvx.c:2304
void dPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *)
Print the blocks in the factored matrix L.
Definition: dutil_dist.c:730
int_t dbCastXk2Pck(int_t k, dxT_struct *xT_s, int nrhs, dLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:4118
dLUValSubBuf_t ** dLluBufInitArr(int_t numLA, dLUstruct_t *LUstruct)
int_t pdgsTrBackSolve3d(superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x3d, double *lsum3d, dxT_struct *xT_s, double *recvbuf, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:7564
void dbcastPermutedSparseA(SuperMatrix *A, dScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: d3DPartition.c:187
int_t pdgsTrForwardSolve3d_newsolve(superlu_dist_options_t *options, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x3d, double *lsum3d, double *recvbuf, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:7458
void dlsum_bmod_GG_newsolve(dtrf3Dpartition_t *trf3Dpartition, double *lsum, double *x, double *xk, int nrhs, dlsumBmod_buff_t *lbmod_buf, int_t k, int *bmod, int_t *Urbs, Ucb_indptr_t **Ucb_indptr, int_t **Ucb_valptr, int_t *xsup, gridinfo_t *grid, dLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:5872
int sp_dgemm_dist(char *, int, double, SuperMatrix *, double *, int, double, double *, int)
Definition: dsp_blas3_dist.c:126
void pdlaqgs(SuperMatrix *, double *, double *, double, double, double, char *)
Definition: pdlaqgs.c:85
int_t dblock_gemm_scatterBottomRight(int_t lb, int_t j, double *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, dLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int_t dleafForestForwardSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, dLUstruct_t *LUstruct, dScalePermstruct_t *ScalePermstruct, dtrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, double *x, double *lsum, double *recvbuf, double *rtemp, MPI_Request *send_req, int nrhs, dSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pdgstrs3d.c:1990
void pdconvertU(superlu_dist_options_t *, gridinfo_t *, dLUstruct_t *, SuperLUStat_t *, int)
int_t dIrecv_UPanel(int_t k, int_t k0, int_t *Usub_buf, double *, dLocalLU_t *, gridinfo_t *, MPI_Request *, int)
void dCopy_CompCol_Matrix_dist(SuperMatrix *, SuperMatrix *)
int dcreate_block_diag_3d(SuperMatrix *A, int batchCount, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
Definition: dcreate_matrix3d.c:347
void dfill_dist(double *, int_t, double)
Fills a double precision array with a given value.
Definition: dutil_dist.c:583
void pdgssvx(superlu_dist_options_t *, SuperMatrix *, dScalePermstruct_t *, double *, int, int, gridinfo_t *, dLUstruct_t *, dSOLVEstruct_t *, double *, SuperLUStat_t *, int *)
Definitions which are precision-neutral.
SupernodeToGridMap_t
Definition: superlu_defs.h:1291
trtype_t
Definition: superlu_defs.h:1317
int64_t handle_t
Definition: superlu_defs.h:348
#define NBUFFERS
Definition: superlu_defs.h:205
int64_t int_t
Definition: superlu_defs.h:119
DiagScale_t
Definition: superlu_enum_consts.h:35
fact_t
Definition: superlu_enum_consts.h:30
Mtype_t
Definition: supermatrix.h:42
Dtype_t
Definition: supermatrix.h:35
Stype_t
Definition: supermatrix.h:22
int j
Definition: sutil_dist.c:287