SuperLU Distributed 9.0.0
gpu3d
superlu_zdefs.h
Go to the documentation of this file.
1
27#ifndef __SUPERLU_ZDEFS /* allow multiple inclusions */
28#define __SUPERLU_ZDEFS
29
30/*
31 * File name: superlu_zdefs.h
32 * Purpose: Distributed SuperLU data types and function prototypes
33 * History:
34 */
35
36#include "superlu_defs.h"
37#include "dcomplex.h"
38
39/*
40 *-- The structure used to store matrix A of the linear system and
41 * several vectors describing the transformations done to matrix A.
42 *
43 * A (SuperMatrix*)
44 * Matrix A in A*X=B, of dimension (A->nrow, A->ncol).
45 * The number of linear equations is A->nrow. The type of A can be:
46 * Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
47 *
48 * DiagScale (DiagScale_t)
49 * Specifies the form of equilibration that was done.
50 * = NOEQUIL: No equilibration.
51 * = ROW: Row equilibration, i.e., A was premultiplied by diag(R).
52 * = COL: Column equilibration, i.e., A was postmultiplied by diag(C).
53 * = BOTH: Both row and column equilibration, i.e., A was replaced
54 * by diag(R)*A*diag(C).
55 *
56 * R double*, dimension (A->nrow)
57 * The row scale factors for A.
58 * If DiagScale = ROW or BOTH, A is multiplied on the left by diag(R).
59 * If DiagScale = NOEQUIL or COL, R is not defined.
60 *
61 * C double*, dimension (A->ncol)
62 * The column scale factors for A.
63 * If DiagScale = COL or BOTH, A is multiplied on the right by diag(C).
64 * If DiagScale = NOEQUIL or ROW, C is not defined.
65 *
66 * perm_r (int*) dimension (A->nrow)
67 * Row permutation vector which defines the permutation matrix Pr,
68 * perm_r[i] = j means row i of A is in position j in Pr*A.
69 *
70 * perm_c (int*) dimension (A->ncol)
71 * Column permutation vector, which defines the
72 * permutation matrix Pc; perm_c[i] = j means column i of A is
73 * in position j in A*Pc.
74 *
75 */
76typedef struct {
78 double *R;
79 double *C;
83
84#if 0 // Sherry: move to superlu_defs.h
85/*-- Auxiliary data type used in PxGSTRS/PxGSTRS1. */
86typedef struct {
87 int_t lbnum; /* Row block number (local). */
88 int_t indpos; /* Starting position in Uindex[]. */
90#endif
91
92/*
93 * On each processor, the blocks in L are stored in compressed block
94 * column format, the blocks in U are stored in compressed block row format.
95 */
96#define MAX_LOOKAHEADS 50
97typedef struct {
98 int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc);
99 free'd in ztrs_compute_communication_structure routinies */
100 int_t *Lrowind_bc_dat; /* size sum of sizes of Lrowind_bc_ptr[lk]) */
101 long int *Lrowind_bc_offset; /* size ceil(NSUPERS/Pc) */
103
104 doublecomplex **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc);
105 free'd in ztrs_compute_communication_structure routinies */
106 doublecomplex *Lnzval_bc_dat; /* size sum of sizes of Lnzval_bc_ptr[lk]) */
107 long int *Lnzval_bc_offset; /* size ceil(NSUPERS/Pc) */
109
110 doublecomplex **Linv_bc_ptr; /* size ceil(NSUPERS/Pc);
111 free'd in ztrs_compute_communication_structure routinies */
112 doublecomplex *Linv_bc_dat; /* size sum of sizes of Linv_bc_ptr[lk]) */
113 long int *Linv_bc_offset; /* size ceil(NSUPERS/Pc) */
114 long int Linv_bc_cnt;
115
116 int_t **Lindval_loc_bc_ptr; /* size ceil(NSUPERS/Pc);
117 pointers to locations in Lrowind_bc_ptr and Lnzval_bc_ptr;
118 free'd in ztrs_compute_communication_structure routinies */
119
120 int_t *Lindval_loc_bc_dat; /* size: sum of sizes of Lindval_loc_bc_ptr[lk]) */
121 long int *Lindval_loc_bc_offset; /* size ceil(NSUPERS/Pc) */
123
124 /* for new U format -> */
125 int_t **Ucolind_bc_ptr; /* size ceil(NSUPERS/Pc) */
126 int_t *Ucolind_bc_dat; /* size: sum of sizes of Ucolind_bc_ptr[lk]) */
127 int64_t *Ucolind_bc_offset; /* size ceil(NSUPERS/Pc) */
129
130 doublecomplex **Unzval_bc_ptr; /* size ceil(NSUPERS/Pc) */
131 doublecomplex *Unzval_bc_dat; /* size: sum of sizes of Unzval_bc_ptr[lk]) */
132 int64_t *Unzval_bc_offset; /* size ceil(NSUPERS/Pc) */
134
135 int_t **Uindval_loc_bc_ptr; /* size ceil(NSUPERS/Pc) pointers to locations in Ucolind_bc_ptr and Unzval_bc_ptr */
136 int_t *Uindval_loc_bc_dat; /* size: sum of sizes of Uindval_loc_bc_ptr[lk]) */
137 int64_t *Uindval_loc_bc_offset; /* size ceil(NSUPERS/Pc) */
139
140 int_t **Uind_br_ptr; /* size ceil(NSUPERS/Pr) pointers to locations in Ucolind_bc_ptr for each block row */
141 int_t *Uind_br_dat; /* size: sum of sizes of Uind_br_ptr[lk]) */
142 int64_t *Uind_br_offset; /* size ceil(NSUPERS/Pr) */
143 int64_t Uind_br_cnt;
144
145 int_t **Ucolind_br_ptr; /* size ceil(NSUPERS/Pr) */
146 int_t *Ucolind_br_dat; /* size: sum of sizes of Ucolind_br_ptr[lk]) */
147 int64_t *Ucolind_br_offset; /* size ceil(NSUPERS/Pr) */
149
150 doublecomplex **Unzval_br_new_ptr; /* size ceil(NSUPERS/Pr) */
151 doublecomplex *Unzval_br_new_dat; /* size: sum of sizes of Unzval_br_ptr[lk]) */
152 int64_t *Unzval_br_new_offset; /* size ceil(NSUPERS/Pr) */
154
155 /* end for new U format <- */
156
157 int_t *Unnz; /* number of nonzeros per block column in U*/
158 int_t **Lrowind_bc_2_lsum; /* size ceil(NSUPERS/Pc) map indices of Lrowind_bc_ptr to indices of lsum */
159 doublecomplex **Uinv_bc_ptr; /* size ceil(NSUPERS/Pc) */
160 doublecomplex *Uinv_bc_dat; /* size sum of sizes of Linv_bc_ptr[lk]) */
161 long int *Uinv_bc_offset; /* size ceil(NSUPERS/Pc) */
162 long int Uinv_bc_cnt;
163
164 int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */
165 int_t *Ufstnz_br_dat; /* size sum of sizes of Ufstnz_br_ptr[lk]) */
166 long int *Ufstnz_br_offset; /* size ceil(NSUPERS/Pr) */
168
169 doublecomplex **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */
170 doublecomplex *Unzval_br_dat; /* size sum of sizes of Unzval_br_ptr[lk]) */
171 long int *Unzval_br_offset; /* size ceil(NSUPERS/Pr) */
173
174 /*-- Data structures used for broadcast and reduction trees. --*/
175 C_Tree *LBtree_ptr; /* size ceil(NSUPERS/Pc) */
176 C_Tree *LRtree_ptr; /* size ceil(NSUPERS/Pr) */
177 C_Tree *UBtree_ptr; /* size ceil(NSUPERS/Pc) */
178 C_Tree *URtree_ptr; /* size ceil(NSUPERS/Pr) */
179#if 0
180 int_t *Lsub_buf; /* Buffer for the remote subscripts of L */
181 doublecomplex *Lval_buf; /* Buffer for the remote nonzeros of L */
182 int_t *Usub_buf; /* Buffer for the remote subscripts of U */
183 doublecomplex *Uval_buf; /* Buffer for the remote nonzeros of U */
184#endif
185 int_t *Lsub_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote subscripts of L*/
186 doublecomplex *Lval_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote nonzeros of L */
187 int_t *Usub_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote subscripts of U */
188 doublecomplex *Uval_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote nonzeros of U */
189 doublecomplex *ujrow; /* used in panel factorization. */
190 int_t bufmax[NBUFFERS]; /* Maximum buffer size across all MPI ranks:
191 * 0 : maximum size of Lsub_buf[]
192 * 1 : maximum size of Lval_buf[]
193 * 2 : maximum size of Usub_buf[]
194 * 3 : maximum size of Uval_buf[]
195 * 4 : maximum size of tempv[LDA]
196 */
197
198 /*-- Record communication schedule for factorization. --*/
199 int *ToRecv; /* Recv from no one (0), left (1), and up (2).*/
200 int *ToSendD; /* Whether need to send down block row. */
201 int **ToSendR; /* List of processes to send right block col. */
202
203 /*-- Record communication schedule for forward/back solves. --*/
204 /* 1/15/22 Sherry: changed int_t to int type */
205 int *fmod; /* Modification count for L-solve */
206 int **fsendx_plist; /* Column process list to send down Xk */
207 int *frecv; /* Modifications to be recv'd in proc row */
208 int nfrecvx; /* Number of Xk I will receive in L-solve */
209 int nfsendx; /* Number of Xk I will send in L-solve */
210 int *bmod; /* Modification count for U-solve */
211 int **bsendx_plist; /* Column process list to send down Xk */
212 int *brecv; /* Modifications to be recv'd in proc row */
213 int nbrecvx; /* Number of Xk I will receive in U-solve */
214 int nbsendx; /* Number of Xk I will send in U-solve */
215 int *mod_bit; /* Flag contribution from each row blocks */
216 int nleaf;
217 int nroot;
218 /*-- Auxiliary arrays used for forward/back solves. --*/
219 int_t *ilsum; /* Starting position of each supernode in lsum
220 (local) */
221 int_t ldalsum; /* LDA of lsum (local) */
222 int_t SolveMsgSent; /* Number of actual messages sent in LU-solve */
223 int_t SolveMsgVol; /* Volume of messages sent in the solve phase */
224 int *bcols_masked; /* Local block column IDs in my 2D grid */
225
226 /*********************/
227 /* The following variables are used in the hybrid solver */
228
229 /*-- Counts to be used in U^{-T} triangular solve. -- */
233 int_t ut_ldalsum; /* LDA of lsum (local) */
234 int_t *ut_ilsum; /* ilsum in column-wise */
235 int_t *utmod; /* Modification count for Ut-solve. */
236 int_t **ut_sendx_plist; /* Row process list to send down Xk */
237 int_t *utrecv; /* Modifications to be recev'd in proc column. */
238 int_t n_utsendx; /* Number of Xk I will receive */
239 int_t n_utrecvx; /* Number of Xk I will send */
243 Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */
245 long int *Ucb_indoffset;
246 long int Ucb_indcnt;
247
248 int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */
250 long int *Ucb_valoffset;
251 long int Ucb_valcnt;
252
253 /* some additional counters for L solve */
256 int_t inv; /* whether the diagonal block is inverted*/
257 int nbcol_masked; /*number of local block columns in my 2D grid*/
258
259#ifdef GPU_ACC
260 /* The following variables are used in GPU trisolve */
261
272 int_t *d_Ucolind_br_dat; /* size: sum of sizes of Ucolind_br_ptr[lk]) */
273 int64_t *d_Ucolind_br_offset; /* size ceil(NSUPERS/Pr) */
274 doublecomplex *d_Unzval_br_new_dat; /* size: sum of sizes of Unzval_br_ptr[lk]) */
275 int64_t *d_Unzval_br_new_offset; /* size ceil(NSUPERS/Pr) */
276
285 int *d_bcols_masked; /* Local block column IDs in my 2D grid */
286
287 // long int *d_Lindval_loc_bc_offset ;
288 // int_t *d_Urbs;
289 // int_t *d_Ufstnz_br_dat;
290 // long int *d_Ufstnz_br_offset;
291 // doublecomplex *d_Unzval_br_dat;
292 // long int *d_Unzval_br_offset;
293 // int_t *d_Ucb_valdat;
294 // long int *d_Ucb_valoffset;
295 // Ucb_indptr_t *d_Ucb_inddat;
296 // long int *d_Ucb_indoffset;
297
305#endif
306
307} zLocalLU_t;
308
309typedef struct
310{
316
317typedef struct
318{
331 int maxLvl; // YL: store this to avoid the use of grid3d
332
333 /* Sherry added the following 3 for variable size batch. 2/17/23 */
334 int mxLeafNode; /* number of leaf nodes. */
335 int *diagDims; /* dimensions of the diagonal blocks at any level of the tree */
336 int *gemmCsizes; /* sizes of the C matrices at any level of the tree. */
338
339
340typedef struct {
345 char dt;
347
348
349/*-- Data structure for communication during matrix-vector multiplication. */
350typedef struct {
352 int_t *ind_tosend; /* X indeices to be sent to other processes */
353 int_t *ind_torecv; /* X indeices to be received from other processes */
354 int_t *ptr_ind_tosend;/* Printers to ind_tosend[] (Size procs)
355 (also point to val_torecv) */
356 int_t *ptr_ind_torecv;/* Printers to ind_torecv[] (Size procs)
357 (also point to val_tosend) */
358 int *SendCounts; /* Numbers of X indices to be sent
359 (also numbers of X values to be received) */
360 int *RecvCounts; /* Numbers of X indices to be received
361 (also numbers of X values to be sent) */
362 void *val_tosend; /* X values to be sent to other processes */
363 void *val_torecv; /* X values to be received from other processes */
364 int_t TotalIndSend; /* Total number of indices to be sent
365 (also total number of values to be received) */
366 int_t TotalValSend; /* Total number of values to be sent.
367 (also total number of indices to be received) */
369
370/*-- Data structure holding the information for the solution phase --*/
371typedef struct {
374 int_t num_diag_procs, *diag_procs, *diag_len;
375 pzgsmv_comm_t *gsmv_comm; /* communication metadata for SpMV,
376 required by IterRefine. */
377 pxgstrs_comm_t *gstrs_comm; /* communication metadata for SpTRSV. */
378 int_t *A_colind_gsmv; /* After pzgsmv_init(), the global column
379 indices of A are translated into the relative
380 positions in the gathered x-vector.
381 This is re-used in repeated calls to pzgsmv() */
382 int_t *xrow_to_proc; /* used by PDSLin */
383 NRformat_loc3d* A3d; /* Point to 3D {A, B} gathered on 2D layer 0.
384 This needs to be peresistent between
385 3D factorization and solve. */
386 #ifdef GPU_ACC
387 doublecomplex *d_lsum, *d_lsum_save; /* used for device lsum*/
388 doublecomplex *d_x; /* used for device solution vector*/
389 int *d_fmod_save, *d_fmod; /* used for device fmod vector*/
390 int *d_bmod_save, *d_bmod; /* used for device bmod vector*/
391 #endif
393
394
395
396/*==== For 3D code ====*/
397
398// new structures for pdgstrf_4_8
399
400#if 0 // Sherry: moved to superlu_defs.h
401typedef struct
402{
403 int_t nub;
404 int_t klst;
405 int_t ldu;
406 int_t* usub;
407 doublecomplex* uval;
409
410typedef struct
411{
412 int_t *lsub;
414 int_t luptr0;
415 int_t nlb; //number of l blocks
416 int_t nsupr;
418
419/* HyP_t is the data structure to assist HALO offload of Schur-complement. */
420typedef struct
421{
422 Remain_info_t *lookAhead_info, *Remain_info;
423 Ublock_info_t *Ublock_info, *Ublock_info_Phi;
424
425 int_t first_l_block_acc , first_u_block_acc;
426 int_t last_offload ;
427 int_t *Lblock_dirty_bit, * Ublock_dirty_bit;
428 doublecomplex *lookAhead_L_buff, *Remain_L_buff;
429 int_t lookAheadBlk; /* number of blocks in look-ahead window */
430 int_t RemainBlk ; /* number of blocks outside look-ahead window */
431 int_t num_look_aheads, nsupers;
432 int_t ldu, ldu_Phi;
433 int_t num_u_blks, num_u_blks_Phi;
434
435 int_t jj_cpu;
436 doublecomplex *bigU_Phi;
437 doublecomplex *bigU_host;
438 int_t Lnbrow;
439 int_t Rnbrow;
440
441 int_t buffer_size;
442 int_t bigu_size;
443 int offloadCondition;
444 int superlu_acc_offload;
445 int nGPUStreams;
446} HyP_t;
447
448#endif // Above are moved to superlu_defs.h
449
450
452 int_t knsupc,
453 HyP_t* HyP,
454 SCT_t* SCT,
455 SuperLUStat_t *stat
456 );
457
458
459
460typedef struct
461{
464} zscuBufs_t;
465
466typedef struct
467{
471
472
473typedef struct zxT_struct
474{
479
480typedef struct zlsumBmod_buff_t
481{
482 doublecomplex * tX; // buffer for reordered X
483 doublecomplex * tU; // buffer for packedU
486
487/*=====================*/
488
489/***********************************************************************
490 * Function prototypes
491 ***********************************************************************/
492
493#ifdef __cplusplus
494extern "C" {
495#endif
496
497
498/* Supernodal LU factor related */
499extern void
502extern void
506extern void
508 doublecomplex **, int_t **, int_t **);
509extern void
511 doublecomplex *a, int_t *colptr, int_t *rowind,
512 doublecomplex **at, int_t **rowptr, int_t **colind);
513extern int
515 SuperMatrix *);
516extern void
518extern void
521extern void
523 int_t *, int_t *, int_t *, int_t *, int_t *,
525extern void
528
529extern void zallocateA_dist (int_t, int_t, doublecomplex **, int_t **, int_t **);
530extern void zGenXtrue_dist (int_t, int_t, doublecomplex *, int_t);
531extern void zFillRHS_dist (char *, int_t, doublecomplex *, int_t,
533extern int zcreate_matrix(SuperMatrix *, int, doublecomplex **, int *,
534 doublecomplex **, int *, FILE *, gridinfo_t *);
535extern int zcreate_matrix_rb(SuperMatrix *, int, doublecomplex **, int *,
536 doublecomplex **, int *, FILE *, gridinfo_t *);
537extern int zcreate_matrix_dat(SuperMatrix *, int, doublecomplex **, int *,
538 doublecomplex **, int *, FILE *, gridinfo_t *);
539extern int zcreate_matrix_postfix(SuperMatrix *, int, doublecomplex **, int *,
540 doublecomplex **, int *, FILE *, char *, gridinfo_t *);
541
542extern void zScalePermstructInit(const int_t, const int_t,
545
546/* Driver related */
547extern void zgsequ_dist (SuperMatrix *, double *, double *, double *,
548 double *, double *, int *);
549extern double zlangs_dist (char *, SuperMatrix *);
550extern void zlaqgs_dist (SuperMatrix *, double *, double *, double,
551 double, double, char *);
552extern void pzgsequ (SuperMatrix *, double *, double *, double *,
553 double *, double *, int *, gridinfo_t *);
554extern double pzlangs (char *, SuperMatrix *, gridinfo_t *);
555extern void pzlaqgs (SuperMatrix *, double *, double *, double,
556 double, double, char *);
557extern int pzPermute_Dense_Matrix(int_t, int_t, int_t [], int_t[],
558 doublecomplex [], int, doublecomplex [], int, int,
559 gridinfo_t *);
560
561extern int sp_ztrsv_dist (char *, char *, char *, SuperMatrix *,
562 SuperMatrix *, doublecomplex *, int *);
563extern int sp_zgemv_dist (char *, doublecomplex, SuperMatrix *, doublecomplex *,
564 int, doublecomplex, doublecomplex *, int);
565extern int sp_zgemm_dist (char *, int, doublecomplex, SuperMatrix *,
567
573 int, int, gridinfo_t *, zLUstruct_t *, double *,
574 SuperLUStat_t *, int *);
579 zScalePermstruct_t *ScalePermstruct,
580 Glu_freeable_t *Glu_freeable, zLUstruct_t *LUstruct,
581 gridinfo_t *grid, int* supernodeMask);
582
584 zScalePermstruct_t *ScalePermstruct,
585 Glu_freeable_t *Glu_freeable, zLUstruct_t *LUstruct,
586 gridinfo_t *grid, int* supernodeMask);
589 int, int, gridinfo_t *, zLUstruct_t *,
590 zSOLVEstruct_t *, double *, SuperLUStat_t *, int *);
597 int_t [], int_t [], gridinfo_t *grid,
600 zLUstruct_t *, zSOLVEstruct_t *, int*);
602extern void pxgstrs_finalize(pxgstrs_comm_t *);
603extern int zldperm_dist(int, int, int_t, int_t [], int_t [],
604 doublecomplex [], int_t *, double [], double []);
605extern int zstatic_schedule(superlu_dist_options_t *, int, int,
607 int_t *, int_t *, int *);
608extern void zLUstructInit(const int_t, zLUstruct_t *);
609extern void zLUstructFree(zLUstruct_t *);
610extern void zDestroy_LU(int_t, gridinfo_t *, zLUstruct_t *);
611extern void zDestroy_Tree(int_t, gridinfo_t *, zLUstruct_t *);
612extern void zscatter_l (int ib, int ljb, int nsupc, int_t iukp, int_t* xsup,
613 int klst, int nbrow, int_t lptr, int temp_nbrow,
614 int_t* usub, int_t* lsub, doublecomplex *tempv,
615 int* indirect_thread, int* indirect2,
616 int_t ** Lrowind_bc_ptr, doublecomplex **Lnzval_bc_ptr,
617 gridinfo_t * grid);
618extern void zscatter_u (int ib, int jb, int nsupc, int_t iukp, int_t * xsup,
619 int klst, int nbrow, int_t lptr, int temp_nbrow,
620 int_t* lsub, int_t* usub, doublecomplex* tempv,
621 int_t ** Ufstnz_br_ptr, doublecomplex **Unzval_br_ptr,
622 gridinfo_t * grid);
623extern int_t pzgstrf(superlu_dist_options_t *, int, int, double anorm,
625
626/* #define GPU_PROF
627#define IPM_PROF */
628
629/* Solve related */
632 doublecomplex *, int_t, int, SuperLUStat_t *, int *);
636 SuperLUStat_t *, int *);
637extern void pzgstrf2_trsm(superlu_dist_options_t * options, int_t k0, int_t k,
638 double thresh, Glu_persist_t *, gridinfo_t *,
639 zLocalLU_t *, MPI_Request *, int tag_ub,
640 SuperLUStat_t *, int *info);
641extern void pzgstrs2_omp(int_t k0, int_t k, Glu_persist_t *, gridinfo_t *,
643extern int_t pzReDistribute_B_to_X(doublecomplex *B, int_t m_loc, int nrhs, int_t ldb,
644 int_t fst_row, int_t *ilsum, doublecomplex *x,
648 int, int, int_t , int *fmod, int_t, int_t, int_t,
650 MPI_Request [], SuperLUStat_t *);
652 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
653 int_t **, int_t *, gridinfo_t *, zLocalLU_t *,
654 MPI_Request [], SuperLUStat_t *);
655
657 int, int_t , int *fmod,
659 SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int_t, int_t, int, int);
661 int, int, int_t , int *fmod, int_t,
663 SuperLUStat_t **, int_t, int_t, int_t, int_t, int, int);
665 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
666 int_t **, int_t *, gridinfo_t *, zLocalLU_t *,
667 SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int, int);
669 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
670 int_t **, int_t *, gridinfo_t *, zLocalLU_t *,
671 SuperLUStat_t **, int_t, int_t, int, int);
672
673extern void zComputeLevelsets(int , int_t , gridinfo_t *,
675
676#ifdef GPU_ACC
678
679extern void zlsum_fmod_inv_gpu_wrap(int, int, int, int, doublecomplex *, doublecomplex *, int, int, int_t , int *fmod, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, doublecomplex *, int64_t *, doublecomplex *, int64_t *, int_t *, int64_t *, int_t *, int *, gridinfo_t *,
680int_t , uint64_t* ,uint64_t* ,doublecomplex* ,doublecomplex* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int* ,int);
681
682extern void zlsum_bmod_inv_gpu_wrap(superlu_dist_options_t *, int, int, int, int, doublecomplex *, doublecomplex *,int,int, int_t , int *, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *,int_t *, int64_t *, int_t *, int64_t *, doublecomplex *, int64_t *, doublecomplex *, int64_t *, doublecomplex *, int64_t *, int_t *, int64_t *, int_t *,gridinfo_t *,
683 int_t, uint64_t*, uint64_t*, doublecomplex*, doublecomplex*,
684 int*, int*, int*, int*,
685 int*, int*, int*, int*, int*,
686 int*, int*, int*, int*, int*, int*,
687 int*, int*, int*, int); //int*); //int*, doublecomplex*);
688
689#endif
690
692 SuperMatrix *, double, zLUstruct_t *,
695 zSOLVEstruct_t *, double *, SuperLUStat_t *, int *);
696
698 SuperMatrix *, double, zLUstruct_t *,
701 zSOLVEstruct_t *, double *, SuperLUStat_t *, int *);
702
703
705 SuperMatrix *, double, zLUstruct_t *,
707 int, double *, SuperLUStat_t *, int *);
709 gridinfo_t *, int_t *, int_t *[],
710 doublecomplex *[], int_t *[], int_t []);
711extern int pzgsmv_AXglobal(int_t, int_t [], doublecomplex [], int_t [],
713extern int pzgsmv_AXglobal_abs(int_t, int_t [], doublecomplex [], int_t [],
714 doublecomplex [], double []);
715extern void pzgsmv_init(SuperMatrix *, int_t *, gridinfo_t *,
716 pzgsmv_comm_t *);
717extern void pzgsmv(int_t, SuperMatrix *, gridinfo_t *, pzgsmv_comm_t *,
718 doublecomplex x[], doublecomplex ax[]);
719extern void pzgsmv_finalize(pzgsmv_comm_t *);
720
721extern int_t zinitLsumBmod_buff(int_t ns, int nrhs, zlsumBmod_buff_t* lbmod_buf);
722extern int_t zleafForestBackSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, zLUstruct_t * LUstruct,
723 zScalePermstruct_t * ScalePermstruct,
724 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
725 doublecomplex * x, doublecomplex * lsum, doublecomplex * recvbuf,
726 MPI_Request * send_req,
727 int nrhs, zlsumBmod_buff_t* lbmod_buf,
728 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
729
730extern int_t znonLeafForestBackSolve3d( int_t treeId, zLUstruct_t * LUstruct,
731 zScalePermstruct_t * ScalePermstruct,
732 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
733 doublecomplex * x, doublecomplex * lsum, zxT_struct *xT_s,doublecomplex * recvbuf,
734 MPI_Request * send_req,
735 int nrhs, zlsumBmod_buff_t* lbmod_buf,
736 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
737
738extern int_t zlasum_bmod_Tree(int_t pTree, int_t cTree, doublecomplex *lsum, doublecomplex *x,
739 zxT_struct *xT_s,
740 int nrhs, zlsumBmod_buff_t* lbmod_buf,
741 zLUstruct_t * LUstruct,
742 ztrf3Dpartition_t* trf3Dpartition,
743 gridinfo3d_t* grid3d, SuperLUStat_t * stat);
744extern int_t zlsumForestBsolve(int_t k, int_t treeId,
745 doublecomplex *lsum, doublecomplex *x, zxT_struct *xT_s,int nrhs, zlsumBmod_buff_t* lbmod_buf,
746 zLUstruct_t * LUstruct,
747 ztrf3Dpartition_t* trf3Dpartition,
748 gridinfo3d_t* grid3d, SuperLUStat_t * stat);
749
750extern int_t zbCastXk2Pck (int_t k, zxT_struct *xT_s, int nrhs,
751 zLUstruct_t * LUstruct, gridinfo_t * grid, xtrsTimer_t *xtrsTimer);
752
753extern int_t zlsumReducePrK (int_t k, doublecomplex*x, doublecomplex* lsum, doublecomplex* recvbuf, int nrhs,
754 zLUstruct_t * LUstruct, gridinfo_t * grid, xtrsTimer_t *xtrsTimer);
755
756extern int_t znonLeafForestForwardSolve3d( int_t treeId, zLUstruct_t * LUstruct,
757 zScalePermstruct_t * ScalePermstruct,
758 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
759 doublecomplex * x, doublecomplex * lsum,
760 zxT_struct *xT_s,
761 doublecomplex * recvbuf, doublecomplex* rtemp,
762 MPI_Request * send_req,
763 int nrhs,
764 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
765extern int_t zleafForestForwardSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, zLUstruct_t * LUstruct,
766 zScalePermstruct_t * ScalePermstruct,
767 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
768 doublecomplex * x, doublecomplex * lsum, doublecomplex * recvbuf, doublecomplex* rtemp,
769 MPI_Request * send_req,
770 int nrhs,
771 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
772
773
775 zScalePermstruct_t * ScalePermstruct,
776 int* supernodeMask, gridinfo_t *grid, SuperLUStat_t * stat);
777extern int_t zreduceSolvedX_newsolve(int_t treeId, int_t sender, int_t receiver, doublecomplex* x, int nrhs,
778 ztrf3Dpartition_t* trf3Dpartition, zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, doublecomplex* recvbuf, xtrsTimer_t *xtrsTimer);
779
780extern void zlsum_fmod_leaf (
781 int_t treeId,
782 ztrf3Dpartition_t* trf3Dpartition,
783 doublecomplex *lsum, /* Sum of local modifications. */
784 doublecomplex *x, /* X array (local) */
785 doublecomplex *xk, /* X[k]. */
786 doublecomplex *rtemp, /* Result of full matrix-vector multiply. */
787 int nrhs, /* Number of right-hand sides. */
788 int knsupc, /* Size of supernode k. */
789 int_t k, /* The k-th component of X. */
790 int *fmod, /* Modification count for L-solve. */
791 int_t nlb, /* Number of L blocks. */
792 int_t lptr, /* Starting position in lsub[*]. */
793 int_t luptr, /* Starting position in lusup[*]. */
794 int_t *xsup,
795 gridinfo_t *grid,
796 zLocalLU_t *Llu,
797 MPI_Request send_req[], /* input/output */
798 SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer);
799
800extern void zlsum_fmod_leaf_newsolve (
801 ztrf3Dpartition_t* trf3Dpartition,
802 doublecomplex *lsum, /* Sum of local modifications. */
803 doublecomplex *x, /* X array (local) */
804 doublecomplex *xk, /* X[k]. */
805 doublecomplex *rtemp, /* Result of full matrix-vector multiply. */
806 int nrhs, /* Number of right-hand sides. */
807 int knsupc, /* Size of supernode k. */
808 int_t k, /* The k-th component of X. */
809 int *fmod, /* Modification count for L-solve. */
810 int_t nlb, /* Number of L blocks. */
811 int_t lptr, /* Starting position in lsub[*]. */
812 int_t luptr, /* Starting position in lusup[*]. */
813 int_t *xsup,
814 gridinfo_t *grid,
815 zLocalLU_t *Llu,
816 MPI_Request send_req[], /* input/output */
817 SuperLUStat_t *stat,xtrsTimer_t *xtrsTimer);
818
819
820extern void zlsum_bmod_GG
821(
822 doublecomplex *lsum, /* Sum of local modifications. */
823 doublecomplex *x, /* X array (local). */
824 doublecomplex *xk, /* X[k]. */
825 int nrhs, /* Number of right-hand sides. */
826 zlsumBmod_buff_t* lbmod_buf,
827 int_t k, /* The k-th component of X. */
828 int *bmod, /* Modification count for L-solve. */
829 int_t *Urbs, /* Number of row blocks in each block column of U.*/
830 Ucb_indptr_t **Ucb_indptr,/* Vertical linked list pointing to Uindex[].*/
831 int_t **Ucb_valptr, /* Vertical linked list pointing to Unzval[]. */
832 int_t *xsup,
833 gridinfo_t *grid,
834 zLocalLU_t *Llu,
835 MPI_Request send_req[], /* input/output */
836 SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer);
837
838extern void zlsum_bmod_GG_newsolve (
839 ztrf3Dpartition_t* trf3Dpartition,
840 doublecomplex *lsum, /* Sum of local modifications. */
841 doublecomplex *x, /* X array (local). */
842 doublecomplex *xk, /* X[k]. */
843 int nrhs, /* Number of right-hand sides. */
844 zlsumBmod_buff_t* lbmod_buf,
845 int_t k, /* The k-th component of X. */
846 int *bmod, /* Modification count for L-solve. */
847 int_t *Urbs, /* Number of row blocks in each block column of U.*/
848 Ucb_indptr_t **Ucb_indptr,/* Vertical linked list pointing to Uindex[].*/
849 int_t **Ucb_valptr, /* Vertical linked list pointing to Unzval[]. */
850 int_t *xsup,
851 gridinfo_t *grid,
852 zLocalLU_t *Llu,
853 MPI_Request send_req[], /* input/output */
854 SuperLUStat_t *stat
855 , xtrsTimer_t *xtrsTimer);
856
857extern int_t
858pzReDistribute3d_B_to_X (doublecomplex *B, int_t m_loc, int nrhs, int_t ldb,
859 int_t fst_row, int_t * ilsum, doublecomplex *x,
860 zScalePermstruct_t * ScalePermstruct,
861 Glu_persist_t * Glu_persist,
862 gridinfo3d_t * grid3d, zSOLVEstruct_t * SOLVEstruct);
863
864
865extern int_t
867 int_t fst_row, int nrhs, doublecomplex *x, int_t * ilsum,
868 zScalePermstruct_t * ScalePermstruct,
869 Glu_persist_t * Glu_persist, gridinfo3d_t * grid3d,
870 zSOLVEstruct_t * SOLVEstruct);
871
872extern void
874 zScalePermstruct_t * ScalePermstruct,
875 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *B,
876 int_t m_loc, int_t fst_row, int_t ldb, int nrhs,
877 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, int *info);
878
879extern void
881 zScalePermstruct_t * ScalePermstruct,
882 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *B,
883 int_t m_loc, int_t fst_row, int_t ldb, int nrhs,
884 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, int *info);
885
886extern int_t pzgsTrBackSolve3d(superlu_dist_options_t *options, int_t n, zLUstruct_t * LUstruct,
887 zScalePermstruct_t * ScalePermstruct,
888 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
889 doublecomplex *x3d, doublecomplex *lsum3d,
890 zxT_struct *xT_s,
891 doublecomplex * recvbuf,
892 MPI_Request * send_req, int nrhs,
893 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
894
896 zScalePermstruct_t * ScalePermstruct,
897 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
898 doublecomplex *x3d, doublecomplex *lsum3d,
899 zxT_struct *xT_s,
900 doublecomplex * recvbuf,
901 MPI_Request * send_req, int nrhs,
902 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
903
905 zScalePermstruct_t * ScalePermstruct,
906 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
907 doublecomplex *x3d, doublecomplex *lsum3d,
908 doublecomplex * recvbuf,
909 MPI_Request * send_req, int nrhs,
910 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
911
913 ztrf3Dpartition_t* trf3Dpartition, gridinfo3d_t *grid3d,
914 doublecomplex *x3d, doublecomplex *lsum3d,
915 doublecomplex * recvbuf,
916 MPI_Request * send_req, int nrhs,
917 zSOLVEstruct_t * SOLVEstruct, SuperLUStat_t * stat, xtrsTimer_t *xtrsTimer);
918
920 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT );
921
922extern int_t zlocalSolveXkYk( trtype_t trtype, int_t k, doublecomplex* x, int nrhs,
923 zLUstruct_t * LUstruct, gridinfo_t * grid,
924 SuperLUStat_t * stat);
925
926extern int_t ziBcastXk2Pck(int_t k, doublecomplex* x, int nrhs,
927 int** sendList, MPI_Request *send_req,
928 zLUstruct_t * LUstruct, gridinfo_t * grid,xtrsTimer_t *xtrsTimer);
929
930extern int_t ztrs_B_init3d(int_t nsupers, doublecomplex* x, int nrhs, zLUstruct_t * LUstruct, gridinfo3d_t *grid3d);
931extern int_t ztrs_X_gather3d(doublecomplex* x, int nrhs, ztrf3Dpartition_t* trf3Dpartition,
932 zLUstruct_t* LUstruct,
933 gridinfo3d_t* grid3d, xtrsTimer_t *xtrsTimer);
934extern int_t zfsolveReduceLsum3d(int_t treeId, int_t sender, int_t receiver, doublecomplex* lsum, doublecomplex* recvbuf, int nrhs,
935 ztrf3Dpartition_t* trf3Dpartition, zLUstruct_t* LUstruct,
936 gridinfo3d_t* grid3d,xtrsTimer_t *xtrsTimer);
937
938extern int_t zbsolve_Xt_bcast(int_t ilvl, zxT_struct *xT_s, int nrhs, ztrf3Dpartition_t* trf3Dpartition,
939 zLUstruct_t * LUstruct,gridinfo3d_t* grid3d , xtrsTimer_t *xtrsTimer);
940
941extern int_t zp2pSolvedX3d(int_t treeId, int_t sender, int_t receiver, doublecomplex* x, int nrhs,
942 ztrf3Dpartition_t* trf3Dpartition, zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, xtrsTimer_t *xtrsTimer);
943
944/* Memory-related */
947extern double *doubleMalloc_dist(int_t);
948extern double *doubleCalloc_dist(int_t);
949extern void *zuser_malloc_dist (int_t, int_t);
950extern void zuser_free_dist (int_t, int_t);
953
954/* Auxiliary routines */
955
961extern void zZeroLblocks(int, int, gridinfo_t *, zLUstruct_t *);
962extern void zZeroUblocks(int iam, int n, gridinfo_t *, zLUstruct_t *);
963extern double zMaxAbsLij(int iam, int n, Glu_persist_t *,
965extern double zMaxAbsUij(int iam, int n, Glu_persist_t *,
970extern void pzinf_norm_error(int, int_t, int_t, doublecomplex [], int_t,
971 doublecomplex [], int_t , MPI_Comm);
972extern void zreadhb_dist (int, FILE *, int_t *, int_t *, int_t *,
973 doublecomplex **, int_t **, int_t **);
974extern void zreadtriple_dist(FILE *, int_t *, int_t *, int_t *,
975 doublecomplex **, int_t **, int_t **);
976extern void zreadtriple_noheader(FILE *, int_t *, int_t *, int_t *,
977 doublecomplex **, int_t **, int_t **);
978extern void zreadrb_dist(int, FILE *, int_t *, int_t *, int_t *,
979 doublecomplex **, int_t **, int_t **);
980extern void zreadMM_dist(FILE *, int_t *, int_t *, int_t *,
981 doublecomplex **, int_t **, int_t **);
982extern int zread_binary(FILE *, int_t *, int_t *, int_t *,
983 doublecomplex **, int_t **, int_t **);
984
986 int ldb, int nrhs, gridinfo3d_t *, int *info);
989 SuperLUStat_t *, gridinfo_t *, int *rowequ, int *colequ, int *iinfo);
991 zScalePermstruct_t *, zLUstruct_t *LUstruct, int_t m, int_t n,
993 int job, int Equil, int *rowequ, int *colequ, int *iinfo);
994extern double zcomputeA_Norm(int notran, SuperMatrix *, gridinfo_t *);
996 int_t n, zLUstruct_t *, zScalePermstruct_t * ScalePermstruct,
997 int* supernodeMask, gridinfo_t *, SuperLUStat_t *);
998
999/* Distribute the data for numerical factorization */
1002 zLUstruct_t *, gridinfo_t *);
1004
1006
1007/* Routines for debugging */
1008extern void zPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *,
1009 zLocalLU_t *);
1010extern void zPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *,
1011 zLocalLU_t *);
1016extern void PrintDoublecomplex(char *, int_t, doublecomplex *);
1017extern int file_PrintDoublecomplex(FILE *fp, char *, int_t, doublecomplex *);
1018
1025
1026/* multi-GPU */
1027#ifdef GPU_ACC
1028// extern void create_nv_buffer(int* , int*, int* , int* );
1029extern void nv_init_wrapper(MPI_Comm);
1030// extern void nv_init_wrapper(int* c, char *v[], int* omp_mpi_level);
1031extern void zprepare_multiGPU_buffers(int,int,int,int,int,int);
1033#endif
1034
1035/* BLAS */
1036
1037#ifdef USE_VENDOR_BLAS
1038extern void zgemm_(const char*, const char*, const int*, const int*, const int*,
1039 const doublecomplex*, const doublecomplex*, const int*, const doublecomplex*,
1040 const int*, const doublecomplex*, doublecomplex*, const int*, int, int);
1041extern void ztrsv_(char*, char*, char*, int*, doublecomplex*, int*,
1042 doublecomplex*, int*, int, int, int);
1043extern void ztrsm_(const char*, const char*, const char*, const char*,
1044 const int*, const int*, const doublecomplex*, const doublecomplex*, const int*,
1045 doublecomplex*, const int*, int, int, int, int);
1046extern void zgemv_(const char *, const int *, const int *, const doublecomplex *,
1047 const doublecomplex *a, const int *, const doublecomplex *, const int *,
1048 const doublecomplex *, doublecomplex *, const int *, int);
1049
1050#else
1051extern int zgemm_(const char*, const char*, const int*, const int*, const int*,
1052 const doublecomplex*, const doublecomplex*, const int*, const doublecomplex*,
1053 const int*, const doublecomplex*, doublecomplex*, const int*);
1054extern int ztrsv_(char*, char*, char*, int*, doublecomplex*, int*,
1055 doublecomplex*, int*);
1056extern int ztrsm_(const char*, const char*, const char*, const char*,
1057 const int*, const int*, const doublecomplex*, const doublecomplex*, const int*,
1058 doublecomplex*, const int*);
1059extern void zgemv_(const char *, const int *, const int *, const doublecomplex *,
1060 const doublecomplex *a, const int *, const doublecomplex *, const int *,
1061 const doublecomplex *, doublecomplex *, const int *);
1062#endif
1063
1064extern void zgeru_(const int*, const int*, const doublecomplex*,
1065 const doublecomplex*, const int*, const doublecomplex*, const int*,
1066 doublecomplex*, const int*);
1067
1068extern int zscal_(const int *n, const doublecomplex *alpha, doublecomplex *dx, const int *incx);
1069extern int zaxpy_(const int *n, const doublecomplex *alpha, const doublecomplex *x,
1070 const int *incx, doublecomplex *y, const int *incy);
1071
1072/* SuperLU BLAS interface: zsuperlu_blas.c */
1073extern int superlu_zgemm(const char *transa, const char *transb,
1074 int m, int n, int k, doublecomplex alpha, doublecomplex *a,
1075 int lda, doublecomplex *b, int ldb, doublecomplex beta, doublecomplex *c, int ldc);
1076extern int superlu_ztrsm(const char *sideRL, const char *uplo,
1077 const char *transa, const char *diag, const int m, const int n,
1078 const doublecomplex alpha, const doublecomplex *a,
1079 const int lda, doublecomplex *b, const int ldb);
1080extern int superlu_zger(const int m, const int n, const doublecomplex alpha,
1081 const doublecomplex *x, const int incx, const doublecomplex *y,
1082 const int incy, doublecomplex *a, const int lda);
1083extern int superlu_zscal(const int n, const doublecomplex alpha, doublecomplex *x, const int incx);
1084extern int superlu_zaxpy(const int n, const doublecomplex alpha,
1085 const doublecomplex *x, const int incx, doublecomplex *y, const int incy);
1086extern int superlu_zgemv(const char *trans, const int m,
1087 const int n, const doublecomplex alpha, const doublecomplex *a,
1088 const int lda, const doublecomplex *x, const int incx,
1089 const doublecomplex beta, doublecomplex *y, const int incy);
1090extern int superlu_ztrsv(char *uplo, char *trans, char *diag,
1091 int n, doublecomplex *a, int lda, doublecomplex *x, int incx);
1092
1093#ifdef SLU_HAVE_LAPACK
1094extern void ztrtri_(char*, char*, int*, doublecomplex*, int*, int*);
1095#endif
1096
1097/*==== For 3D code ====*/
1098extern int zcreate_matrix3d(SuperMatrix *A, int nrhs, doublecomplex **rhs,
1099 int *ldb, doublecomplex **x, int *ldx,
1100 FILE *fp, gridinfo3d_t *grid3d);
1101extern int zcreate_matrix_postfix3d(SuperMatrix *A, int nrhs, doublecomplex **rhs,
1102 int *ldb, doublecomplex **x, int *ldx,
1103 FILE *fp, char * postfix, gridinfo3d_t *grid3d);
1104extern int zcreate_block_diag_3d(SuperMatrix *A, int batchCount, int nrhs, doublecomplex **rhs,
1105 int *ldb, doublecomplex **x, int *ldx,
1106 FILE *fp, char * postfix, gridinfo3d_t *grid3d);
1107extern int zcreate_batch_systems(handle_t *SparseMatrix_handles, int batchCount,
1108 int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx,
1109 FILE *fp, char * postfix, gridinfo3d_t *grid3d);
1110
1111/* Matrix distributed in NRformat_loc in 3D process grid. It converts
1112 it to a NRformat_loc distributed in 2D grid in grid-0 */
1114 int ldb, int nrhs, gridinfo3d_t *grid3d,
1115 NRformat_loc3d **);
1117 int ldb, int nrhs, gridinfo3d_t *grid3d,
1118 NRformat_loc3d **);
1119extern int zScatter_B3d(NRformat_loc3d *A3d, gridinfo3d_t *grid3d);
1120
1122 zScalePermstruct_t *, doublecomplex B[], int ldb, int nrhs,
1124 double *berr, SuperLUStat_t *, int *info);
1125extern int_t pzgstrf3d(superlu_dist_options_t *, int m, int n, double anorm,
1127 gridinfo3d_t *, SuperLUStat_t *, int *);
1128extern void zInit_HyP(superlu_dist_options_t *, HyP_t* HyP, zLocalLU_t *Llu, int_t mcb, int_t mrb);
1129extern void Free_HyP(HyP_t* HyP);
1130extern int updateDirtyBit(int_t k0, HyP_t* HyP, gridinfo_t* grid);
1131
1132 /* from scatter.h */
1133extern void
1135 Remain_info_t *Remain_info, doublecomplex *L_mat, int ldl,
1136 doublecomplex *U_mat, int ldu, doublecomplex *bigV,
1137 // int_t jj0,
1138 int_t knsupc, int_t klst,
1139 int_t *lsub, int_t *usub, int_t ldt,
1140 int_t thread_id,
1141 int *indirect, int *indirect2,
1142 int_t **Lrowind_bc_ptr, doublecomplex **Lnzval_bc_ptr,
1143 int_t **Ufstnz_br_ptr, doublecomplex **Unzval_br_ptr,
1144 int_t *xsup, gridinfo_t *, SuperLUStat_t *
1145#ifdef SCATTER_PROFILE
1146 , double *Host_TheadScatterMOP, double *Host_TheadScatterTimer
1147#endif
1148 );
1149
1150#ifdef _OPENMP
1151/*this version uses a lock to prevent multiple thread updating the same block*/
1152extern void
1153zblock_gemm_scatter_lock( int_t lb, int_t j, omp_lock_t* lock,
1154 Ublock_info_t *Ublock_info, Remain_info_t *Remain_info,
1155 doublecomplex *L_mat, int_t ldl, doublecomplex *U_mat, int_t ldu,
1156 doublecomplex *bigV,
1157 // int_t jj0,
1158 int_t knsupc, int_t klst,
1159 int_t *lsub, int_t *usub, int_t ldt,
1160 int_t thread_id,
1161 int *indirect, int *indirect2,
1162 int_t **Lrowind_bc_ptr, doublecomplex **Lnzval_bc_ptr,
1163 int_t **Ufstnz_br_ptr, doublecomplex **Unzval_br_ptr,
1164 int_t *xsup, gridinfo_t *
1165#ifdef SCATTER_PROFILE
1166 , double *Host_TheadScatterMOP, double *Host_TheadScatterTimer
1167#endif
1168 );
1169#endif
1170
1171extern int_t
1173 int_t knsupc, int_t klst, int_t* lsub,
1174 int_t * usub, int_t ldt,
1175 int* indirect, int* indirect2,
1176 HyP_t* HyP, zLUstruct_t *, gridinfo_t*,
1177 SCT_t*SCT, SuperLUStat_t *
1178 );
1179extern int_t
1181 int_t knsupc, int_t klst, int_t* lsub,
1182 int_t * usub, int_t ldt,
1183 int* indirect, int* indirect2,
1184 HyP_t* HyP, zLUstruct_t *, gridinfo_t*,
1185 SCT_t*SCT, SuperLUStat_t * );
1186extern int_t
1188 int_t knsupc, int_t klst, int_t* lsub,
1189 int_t * usub, int_t ldt,
1190 int* indirect, int* indirect2,
1191 HyP_t* HyP, zLUstruct_t *, gridinfo_t*,
1192 SCT_t*SCT, SuperLUStat_t * );
1193extern int_t
1195 int_t knsupc, int_t klst, int_t* lsub,
1196 int_t * usub, int_t ldt,
1197 int* indirect, int* indirect2,
1198 HyP_t* HyP, zLUstruct_t *, gridinfo_t*,
1199 SCT_t*SCT, SuperLUStat_t * );
1200
1201 /* from gather.h */
1202extern void zgather_u(int_t num_u_blks,
1203 Ublock_info_t *Ublock_info, int_t * usub,
1204 doublecomplex *uval, doublecomplex *bigU, int_t ldu,
1205 int_t *xsup, int_t klst /* for SuperSize */
1206 );
1207
1208extern void zgather_l( int_t num_LBlk, int_t knsupc,
1209 Remain_info_t *L_info,
1210 doublecomplex * lval, int_t LD_lval,
1211 doublecomplex * L_buff );
1212
1215 int_t *myIperm, int_t *iperm_c_supno );
1216extern void zRgather_U(int_t k, int_t jj0, int_t *usub, doublecomplex *uval,
1218 gridinfo_t *, HyP_t *, int_t *myIperm,
1219 int_t *iperm_c_supno, int_t *perm_u);
1220
1221 /* from pxdistribute3d.h */
1222extern void zbcastPermutedSparseA(SuperMatrix *A,
1223 zScalePermstruct_t *ScalePermstruct,
1224 Glu_freeable_t *Glu_freeable,
1225 zLUstruct_t *LUstruct, gridinfo3d_t *grid3d);
1226
1227extern void znewTrfPartitionInit(int_t nsupers, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d);
1228
1229
1230 /* from xtrf3Dpartition.h */
1232 superlu_dist_options_t *options,
1233 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
1235 superlu_dist_options_t *options,
1236 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
1238 superlu_dist_options_t *options,
1239 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
1240extern void zDestroy_trf3Dpartition(ztrf3Dpartition_t *trf3Dpartition);
1241
1242extern void z3D_printMemUse(ztrf3Dpartition_t* trf3Dpartition,
1243 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
1244
1245//extern int* getLastDep(gridinfo_t *grid, SuperLUStat_t *stat,
1246// superlu_dist_options_t *options, zLocalLU_t *Llu,
1247// int_t* xsup, int_t num_look_aheads, int_t nsupers,
1248// int_t * iperm_c_supno);
1249
1250extern void zinit3DLUstructForest( int_t* myTreeIdxs, int_t* myZeroTrIdxs,
1251 sForest_t** sForests, zLUstruct_t* LUstruct,
1252 gridinfo3d_t* grid3d);
1253
1254extern int_t zgatherAllFactoredLUFr(int_t* myZeroTrIdxs, sForest_t* sForests,
1255 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d,
1256 SCT_t* SCT );
1257
1258 /* The following are from pdgstrf2.h */
1259extern int_t zLpanelUpdate(int_t off0, int_t nsupc, doublecomplex* ublk_ptr,
1260 int_t ld_ujrow, doublecomplex* lusup, int_t nsupr, SCT_t*);
1261extern void zgstrf2(int_t k, doublecomplex* diagBlk, int_t LDA, doublecomplex* BlockUfactor, int_t LDU,
1262 double thresh, int_t* xsup, superlu_dist_options_t *options,
1263 SuperLUStat_t *stat, int *info);
1265 double thresh, doublecomplex *BlockUFactor, Glu_persist_t *,
1267 SuperLUStat_t *, int *info, SCT_t*);
1268extern int_t zTrs2_GatherU(int_t iukp, int_t rukp, int_t klst,
1269 int_t nsupc, int_t ldu, int_t *usub,
1270 doublecomplex* uval, doublecomplex *tempv);
1271extern int_t zTrs2_ScatterU(int_t iukp, int_t rukp, int_t klst,
1272 int_t nsupc, int_t ldu, int_t *usub,
1273 doublecomplex* uval, doublecomplex *tempv);
1274extern int_t zTrs2_GatherTrsmScatter(int_t klst, int_t iukp, int_t rukp,
1275 int_t *usub, doublecomplex* uval, doublecomplex *tempv,
1276 int_t knsupc, int nsupr, doublecomplex* lusup,
1277 Glu_persist_t *Glu_persist) ;
1278extern void pzgstrs2
1279#ifdef _CRAY
1280(
1281 int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid,
1282 zLocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3
1283);
1284#else
1285(
1286 int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid,
1287 zLocalLU_t *Llu, SuperLUStat_t *stat
1288);
1289#endif
1290
1291extern void pzgstrf2(superlu_dist_options_t *, int_t nsupers, int_t k0,
1292 int_t k, double thresh, Glu_persist_t *, gridinfo_t *,
1293 zLocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *);
1294
1295 /* from p3dcomm.h */
1296extern int_t zAllocLlu_3d(int_t nsupers, zLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
1297extern int_t zp3dScatter(int_t n, zLUstruct_t * LUstruct, gridinfo3d_t* grid3d, int *supernodeMask);
1299 zLUstruct_t * LUstruct, gridinfo3d_t* grid3d, int *supernodeMask);
1301 zLUstruct_t * LUstruct, gridinfo3d_t* grid3d, int *supernodeMask);
1302extern int_t zcollect3dLpanels(int_t layer, int_t nsupers, zLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
1303extern int_t zcollect3dUpanels(int_t layer, int_t nsupers, zLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
1304extern int_t zp3dCollect(int_t layer, int_t n, zLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
1305/*zero out LU non zero entries*/
1306extern int_t zzeroSetLU(int_t nnodes, int_t* nodeList , zLUstruct_t *, gridinfo3d_t*);
1307extern int zAllocGlu_3d(int_t n, int_t nsupers, zLUstruct_t *);
1308extern int zDeAllocLlu_3d(int_t n, zLUstruct_t *, gridinfo3d_t*);
1309extern int zDeAllocGlu_3d(zLUstruct_t *);
1310
1311/* Reduces L and U panels of nodes in the List nodeList (size=nnnodes)
1312receiver[L(nodelist)] =sender[L(nodelist)] +receiver[L(nodelist)]
1313receiver[U(nodelist)] =sender[U(nodelist)] +receiver[U(nodelist)]
1314*/
1316 int_t nnodes, int_t* nodeList,
1317 doublecomplex* Lval_buf, doublecomplex* Uval_buf,
1318 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1319/*reduces all nodelists required in a level*/
1320extern int zreduceAllAncestors3d(int_t ilvl, int_t* myNodeCount,
1321 int_t** treePerm,
1322 zLUValSubBuf_t* LUvsb,
1323 zLUstruct_t* LUstruct,
1324 gridinfo3d_t* grid3d,
1325 SCT_t* SCT );
1326/*
1327 Copies factored L and U panels from sender grid to receiver grid
1328 receiver[L(nodelist)] <-- sender[L(nodelist)];
1329 receiver[U(nodelist)] <-- sender[U(nodelist)];
1330*/
1332 int_t nnodes, int_t *nodeList, zLUValSubBuf_t* LUvsb,
1333 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d,SCT_t* SCT );
1334
1335/*Gathers all the L and U factors to grid 0 for solve stage
1336 By repeatidly calling above function*/
1338 gridinfo3d_t* grid3d, SCT_t* SCT );
1339
1340/*Distributes data in each layer and initilizes ancestors
1341 as zero in required nodes*/
1342int_t zinit3DLUstruct( int_t* myTreeIdxs, int_t* myZeroTrIdxs,
1343 int_t* nodeCount, int_t** nodeList,
1344 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d);
1345
1347 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1349 doublecomplex beta, doublecomplex* Lval_buf,
1350 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1352 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1354 doublecomplex beta, doublecomplex* Uval_buf,
1355 zLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
1356
1357 /* from communication_aux.h */
1359 gridinfo_t *, int* msgcnt, MPI_Request *,
1360 int **ToSendR, int_t *xsup, int );
1362 gridinfo_t *, int* msgcnt, int **ToSendR,
1363 int_t *xsup , SCT_t*, int);
1365 gridinfo_t *, int* msgcnt, MPI_Request *,
1366 int *ToSendD, int );
1368 gridinfo_t *, int* msgcnt, int *ToSendD, SCT_t*, int);
1369extern int_t zIrecv_LPanel (int_t k, int_t k0, int_t* Lsub_buf,
1370 doublecomplex* Lval_buf, gridinfo_t *,
1371 MPI_Request *, zLocalLU_t *, int);
1373 zLocalLU_t *, gridinfo_t*, MPI_Request *, int);
1374extern int_t zWait_URecv(MPI_Request *, int* msgcnt, SCT_t *);
1375extern int_t zWait_LRecv(MPI_Request*, int* msgcnt, int* msgcntsU,
1376 gridinfo_t *, SCT_t*);
1378 MPI_Request *, gridinfo_t *, int);
1380 int_t src, gridinfo_t *, SCT_t*, int);
1382 gridinfo_t *, zLocalLU_t *);
1384 MPI_Request *, gridinfo_t *, int);
1386 int_t src, MPI_Request *, gridinfo_t *,
1387 SCT_t*, int);
1389 int_t src, MPI_Request *, gridinfo_t*, SCT_t*, int);
1390extern int_t zUDiagBlockRecvWait( int_t k, int* IrecvPlcd_D, int* factored_L,
1391 MPI_Request *, gridinfo_t *, zLUstruct_t *, SCT_t *);
1392
1393#if (MPI_VERSION>2)
1394extern int_t zIBcast_UDiagBlock(int_t k, doublecomplex *ublk_ptr, int_t size,
1395 MPI_Request *, gridinfo_t *);
1396extern int_t zIBcast_LDiagBlock(int_t k, doublecomplex *lblk_ptr, int_t size,
1397 MPI_Request *, gridinfo_t *);
1398#endif
1399
1400 /* from trfCommWrapper.h */
1402 doublecomplex *BlockUFactor, doublecomplex *BlockLFactor,
1403 int* IrecvPlcd_D, MPI_Request *, MPI_Request *,
1404 MPI_Request *, MPI_Request *, gridinfo_t *,
1405 superlu_dist_options_t *, double thresh,
1406 zLUstruct_t *LUstruct, SuperLUStat_t *, int *info,
1407 SCT_t *, int tag_ub);
1408extern int_t zUPanelTrSolve( int_t k, doublecomplex* BlockLFactor, doublecomplex* bigV,
1411extern int_t zLPanelUpdate(int_t k, int* IrecvPlcd_D, int* factored_L,
1412 MPI_Request *, doublecomplex* BlockUFactor, gridinfo_t *,
1413 zLUstruct_t *, SCT_t *);
1414extern int_t zUPanelUpdate(int_t k, int* factored_U, MPI_Request *,
1415 doublecomplex* BlockLFactor, doublecomplex* bigV,
1418extern int_t zIBcastRecvLPanel(int_t k, int_t k0, int* msgcnt,
1419 MPI_Request *, MPI_Request *,
1420 int_t* Lsub_buf, doublecomplex* Lval_buf,
1421 int * factored, gridinfo_t *, zLUstruct_t *,
1422 SCT_t *, int tag_ub);
1423extern int_t zIBcastRecvUPanel(int_t k, int_t k0, int* msgcnt, MPI_Request *,
1424 MPI_Request *, int_t* Usub_buf, doublecomplex* Uval_buf,
1425 gridinfo_t *, zLUstruct_t *, SCT_t *, int tag_ub);
1426extern int_t zWaitL(int_t k, int* msgcnt, int* msgcntU, MPI_Request *,
1427 MPI_Request *, gridinfo_t *, zLUstruct_t *, SCT_t *);
1428extern int_t zWaitU(int_t k, int* msgcnt, MPI_Request *, MPI_Request *,
1429 gridinfo_t *, zLUstruct_t *, SCT_t *);
1430extern int_t zLPanelTrSolve(int_t k, int* factored_L, doublecomplex* BlockUFactor,
1431 gridinfo_t *, zLUstruct_t *);
1432
1433 /* from trfAux.h */
1434extern int getNsupers(int, Glu_persist_t *);
1435extern int_t initPackLUInfo(int_t nsupers, packLUInfo_t* packLUInfo);
1436extern int freePackLUInfo(packLUInfo_t* packLUInfo);
1439 lPanelInfo_t *, int_t*, int_t *, int_t *,
1440 doublecomplex *bigU, int_t* Lsub_buf,
1441 doublecomplex* Lval_buf, int_t* Usub_buf,
1442 doublecomplex* Uval_buf, gridinfo_t *, zLUstruct_t *);
1446 zLUValSubBuf_t* LUvsb, gridinfo_t *,
1447 zLUstruct_t *, HyP_t*);
1451// permutation from superLU default
1452
1453 /* from treeFactorization.h */
1456 int_t ldt, int_t num_threads, int_t nsupers,
1458extern int zfreeScuBufs(zscuBufs_t* scuBufs);
1459
1460#if 0 // NOT CALLED
1461// the generic tree factoring code
1462extern int_t treeFactor(
1463 int_t nnnodes, // number of nodes in the tree
1464 int_t *perm_c_supno, // list of nodes in the order of factorization
1465 commRequests_t *comReqs, // lists of communication requests
1466 zscuBufs_t *scuBufs, // contains buffers for schur complement update
1467 packLUInfo_t*packLUInfo,
1468 msgs_t*msgs,
1469 zLUValSubBuf_t* LUvsb,
1470 zdiagFactBufs_t *dFBuf,
1471 factStat_t *factStat,
1472 factNodelists_t *fNlists,
1473 superlu_dist_options_t *options,
1474 int_t * gIperm_c_supno,
1475 int_t ldt,
1476 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1477 double thresh, SCT_t *SCT,
1478 int *info
1479);
1480#endif
1481
1483 int_t nnodes, // number of nodes in the tree
1484 int_t *perm_c_supno, // list of nodes in the order of factorization
1485 treeTopoInfo_t* treeTopoInfo,
1486 commRequests_t *comReqs, // lists of communication requests
1487 zscuBufs_t *scuBufs, // contains buffers for schur complement update
1488 packLUInfo_t*packLUInfo,
1489 msgs_t*msgs,
1490 zLUValSubBuf_t* LUvsb,
1491 zdiagFactBufs_t *dFBuf,
1492 factStat_t *factStat,
1493 factNodelists_t *fNlists,
1494 superlu_dist_options_t *options,
1495 int_t * gIperm_c_supno,
1496 int_t ldt,
1497 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1498 double thresh, SCT_t *SCT,
1499 int *info
1500);
1501
1503 int_t nnnodes, // number of nodes in the tree
1504 int_t *perm_c_supno, // list of nodes in the order of factorization
1505 commRequests_t *comReqs, // lists of communication requests
1506 zscuBufs_t *scuBufs, // contains buffers for schur complement update
1507 packLUInfo_t*packLUInfo,
1508 msgs_t*msgs,
1509 zLUValSubBuf_t* LUvsb,
1510 zdiagFactBufs_t *dFBuf,
1511 factStat_t *factStat,
1512 factNodelists_t *fNlists,
1513 superlu_dist_options_t *options,
1514 int_t * gIperm_c_supno,
1515 int_t ldt,
1516 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1517 double thresh, SCT_t *SCT, int tag_ub,
1518 int *info
1519);
1520
1522 sForest_t* sforest,
1523 commRequests_t **comReqss, // lists of communication requests // size maxEtree level
1524 zscuBufs_t *scuBufs, // contains buffers for schur complement update
1525 packLUInfo_t*packLUInfo,
1526 msgs_t**msgss, // size=num Look ahead
1527 zLUValSubBuf_t** LUvsbs, // size=num Look ahead
1528 zdiagFactBufs_t **dFBufs, // size maxEtree level
1529 factStat_t *factStat,
1530 factNodelists_t *fNlists,
1531 gEtreeInfo_t* gEtreeInfo, // global etree info
1532 superlu_dist_options_t *options,
1533 int_t * gIperm_c_supno,
1534 int_t ldt,
1535 HyP_t* HyP,
1536 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1537 double thresh, SCT_t *SCT, int tag_ub,
1538 int *info
1539);
1541extern int zLluBufFreeArr(int_t numLA, zLUValSubBuf_t **LUvsbs);
1542extern zdiagFactBufs_t** zinitDiagFactBufsArr(int mxLeafNode, int ldt, gridinfo_t* grid);
1543extern zdiagFactBufs_t** zinitDiagFactBufsArrMod(int mxLeafNode, int* ldts, gridinfo_t* grid);
1544extern int zfreeDiagFactBufsArr(int mxLeafNode, zdiagFactBufs_t** dFBufs);
1545extern int zinitDiagFactBufs(int ldt, zdiagFactBufs_t* dFBuf);
1546extern int_t checkRecvUDiag(int_t k, commRequests_t *comReqs,
1547 gridinfo_t *grid, SCT_t *SCT);
1548extern int_t checkRecvLDiag(int_t k, commRequests_t *comReqs, gridinfo_t *, SCT_t *);
1549
1550
1551extern int pzflatten_LDATA(superlu_dist_options_t *options, int_t n, zLUstruct_t * LUstruct,
1552 gridinfo_t *grid, SuperLUStat_t * stat);
1554 zLUstruct_t *, SuperLUStat_t *, int n);
1556 zLUstruct_t *, SuperLUStat_t *, int n);
1557
1558extern int_t
1560 Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno,
1561 gridinfo_t *grid, int_t *colptr[], int_t *rowind[],
1562 doublecomplex *a[]);
1563extern float
1565 zScalePermstruct_t *ScalePermstruct,
1566 Glu_freeable_t *Glu_freeable, zLUstruct_t *LUstruct,
1567 gridinfo3d_t *grid3d);
1568
1569
1570#if 0 // NOT CALLED
1571/* from ancFactorization.h (not called) */
1572extern int_t ancestorFactor(
1573 int_t ilvl, // level of factorization
1574 sForest_t* sforest,
1575 commRequests_t **comReqss, // lists of communication requests // size maxEtree level
1576 zscuBufs_t *scuBufs, // contains buffers for schur complement update
1577 packLUInfo_t*packLUInfo,
1578 msgs_t**msgss, // size=num Look ahead
1579 zLUValSubBuf_t** LUvsbs, // size=num Look ahead
1580 zdiagFactBufs_t **dFBufs, // size maxEtree level
1581 factStat_t *factStat,
1582 factNodelists_t *fNlists,
1583 gEtreeInfo_t* gEtreeInfo, // global etree info
1584 superlu_dist_options_t *options,
1585 int_t * gIperm_c_supno,
1586 int_t ldt,
1587 HyP_t* HyP,
1588 zLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1589 double thresh, SCT_t *SCT, int tag_ub, int *info
1590);
1591#endif
1592
1593 /* Batch interface */
1594extern int pzgssvx3d_csc_batch(
1595 superlu_dist_options_t *, int batchCount, int m, int n, int nnz,
1596 int nrhs, handle_t *, doublecomplex **RHSptr, int *ldRHS,
1597 double **ReqPtr, double **CeqPtr,
1598 int **RpivPtr, int **CpivPtr, DiagScale_t *DiagScale,
1599 handle_t *F, doublecomplex **Xptr, int *ldX, double **Berrs,
1600 gridinfo3d_t *grid3d, SuperLUStat_t *stat, int *info
1601 //DeviceContext context /* device context including queues, events, dependencies */
1602 );
1603extern int zequil_batch(
1604 superlu_dist_options_t *, int batchCount, int m, int n, handle_t *,
1605 double **ReqPtr, double **CeqPtr, DiagScale_t *
1606 // DeviceContext context /* device context including queues, events, dependencies */
1607 );
1608extern int zpivot_batch(
1609 superlu_dist_options_t *, int batchCount, int m, int n, handle_t *,
1610 double **ReqPtr, double **CeqPtr, DiagScale_t *, int **RpivPtr
1611 // DeviceContext context /* device context including queues, events, dependencies */
1612 );
1613
1614
1615extern int zwriteLUtoDisk(int nsupers, int_t *xsup, zLUstruct_t *LUstruct);
1616extern int zcheckArr(doublecomplex *A, doublecomplex *B, int n);
1617extern int zcheckLUFromDisk(int nsupers, int_t *xsup, zLUstruct_t *LUstruct);
1618extern void zDumpLblocks3D(int_t nsupers, gridinfo3d_t *grid3d,
1619 Glu_persist_t *Glu_persist, zLocalLU_t *Llu);
1620extern void zDumpLblocks3D(int_t nsupers, gridinfo3d_t *grid3d,
1621 Glu_persist_t *Glu_persist, zLocalLU_t *Llu);
1622
1623/*== end 3D prototypes ===================*/
1624
1625extern doublecomplex *zready_x;
1627
1628#ifdef __cplusplus
1629 }
1630#endif
1631
1632#endif /* __SUPERLU_dDEFS */
1633
Header for dcomplex.c.
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public trans
Definition: superlupara.f90:35
integer, parameter, public factored
Definition: superlupara.f90:35
integer, parameter, public lusup
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: superlu_defs.h:1256
Definition: superlu_defs.h:506
Definition: superlu_defs.h:451
Definition: superlu_defs.h:854
Definition: supermatrix.h:194
Definition: supermatrix.h:176
Definition: psymbfact.h:57
Definition: superlu_defs.h:799
Definition: util_dist.h:199
Definition: util_dist.h:101
Definition: supermatrix.h:54
Definition: superlu_defs.h:789
Definition: superlu_defs.h:781
Definition: superlu_defs.h:1012
Definition: dcomplex.h:30
Definition: superlu_defs.h:1025
Definition: superlu_defs.h:927
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
Definition: superlu_defs.h:404
Definition: superlu_defs.h:834
Definition: superlu_defs.h:1034
Definition: superlu_defs.h:844
Definition: superlu_defs.h:567
Definition: superlu_zdefs.h:350
void * val_tosend
Definition: superlu_zdefs.h:362
int_t * ptr_ind_torecv
Definition: superlu_zdefs.h:356
void * val_torecv
Definition: superlu_zdefs.h:363
int_t * extern_start
Definition: superlu_zdefs.h:351
int_t TotalValSend
Definition: superlu_zdefs.h:366
int_t * ind_torecv
Definition: superlu_zdefs.h:353
int * SendCounts
Definition: superlu_zdefs.h:358
int * RecvCounts
Definition: superlu_zdefs.h:360
int_t * ptr_ind_tosend
Definition: superlu_zdefs.h:354
int_t * ind_tosend
Definition: superlu_zdefs.h:352
int_t TotalIndSend
Definition: superlu_zdefs.h:364
Definition: superlu_defs.h:989
Definition: superlu_defs.h:773
Definition: superlu_defs.h:728
Definition: superlu_defs.h:970
Definition: superlu_defs.h:825
Definition: superlu_defs.h:1040
Definition: superlu_zdefs.h:310
int_t * Lsub_buf
Definition: superlu_zdefs.h:311
doublecomplex * Uval_buf
Definition: superlu_zdefs.h:314
int_t * Usub_buf
Definition: superlu_zdefs.h:313
doublecomplex * Lval_buf
Definition: superlu_zdefs.h:312
Definition: superlu_zdefs.h:340
zLocalLU_t * Llu
Definition: superlu_zdefs.h:343
int_t * etree
Definition: superlu_zdefs.h:341
ztrf3Dpartition_t * trf3Dpart
Definition: superlu_zdefs.h:344
char dt
Definition: superlu_zdefs.h:345
Glu_persist_t * Glu_persist
Definition: superlu_zdefs.h:342
Definition: superlu_zdefs.h:97
long int Uinv_bc_cnt
Definition: superlu_zdefs.h:162
int_t * d_Lrowind_bc_dat
Definition: superlu_zdefs.h:262
long int * Unzval_br_offset
Definition: superlu_zdefs.h:171
int_t ** Ufstnz_br_ptr
Definition: superlu_zdefs.h:164
int64_t * Ucolind_br_offset
Definition: superlu_zdefs.h:147
int_t * d_Ucolind_br_dat
Definition: superlu_zdefs.h:272
int_t ** Uind_br_ptr
Definition: superlu_zdefs.h:140
int64_t * Uind_br_offset
Definition: superlu_zdefs.h:142
int_t * d_xsup
Definition: superlu_zdefs.h:299
int64_t Unzval_br_new_cnt
Definition: superlu_zdefs.h:153
gridinfo_t * d_grid
Definition: superlu_zdefs.h:304
int_t n
Definition: superlu_zdefs.h:254
int_t * Uind_br_dat
Definition: superlu_zdefs.h:141
doublecomplex * d_Lnzval_bc_dat
Definition: superlu_zdefs.h:264
int_t * d_Uindval_loc_bc_dat
Definition: superlu_zdefs.h:283
int64_t * Ucolind_bc_offset
Definition: superlu_zdefs.h:127
int64_t * d_Ucolind_bc_offset
Definition: superlu_zdefs.h:267
long int * d_Uinv_bc_offset
Definition: superlu_zdefs.h:280
int_t * ut_modbit
Definition: superlu_zdefs.h:241
doublecomplex * d_Linv_bc_dat
Definition: superlu_zdefs.h:277
int nbrecvx
Definition: superlu_zdefs.h:213
int_t * utrecv
Definition: superlu_zdefs.h:237
int_t * Lindval_loc_bc_dat
Definition: superlu_zdefs.h:120
int_t FRECV
Definition: superlu_zdefs.h:232
int_t * Ufstnz_br_dat
Definition: superlu_zdefs.h:165
doublecomplex ** Unzval_br_new_ptr
Definition: superlu_zdefs.h:150
C_Tree * d_UBtree_ptr
Definition: superlu_zdefs.h:302
int * d_bcols_masked
Definition: superlu_zdefs.h:285
int_t SolveMsgVol
Definition: superlu_zdefs.h:223
int_t ut_ldalsum
Definition: superlu_zdefs.h:233
long int Linv_bc_cnt
Definition: superlu_zdefs.h:114
int_t inv
Definition: superlu_zdefs.h:256
int_t * Ucolind_bc_dat
Definition: superlu_zdefs.h:126
int_t * d_Uind_br_dat
Definition: superlu_zdefs.h:268
int * ToRecv
Definition: superlu_zdefs.h:199
C_Tree * LRtree_ptr
Definition: superlu_zdefs.h:176
doublecomplex ** Unzval_bc_ptr
Definition: superlu_zdefs.h:130
int_t n_utrecvx
Definition: superlu_zdefs.h:239
int nbsendx
Definition: superlu_zdefs.h:214
C_Tree * URtree_ptr
Definition: superlu_zdefs.h:178
int_t * ut_ilsum
Definition: superlu_zdefs.h:234
doublecomplex * ujrow
Definition: superlu_zdefs.h:189
int nroot
Definition: superlu_zdefs.h:217
int_t * Uindval_loc_bc_dat
Definition: superlu_zdefs.h:136
long int Unzval_br_cnt
Definition: superlu_zdefs.h:172
long int * d_Unzval_bc_offset
Definition: superlu_zdefs.h:271
int nfrecvx
Definition: superlu_zdefs.h:208
int * bcols_masked
Definition: superlu_zdefs.h:224
Ucb_indptr_t ** Ucb_indptr
Definition: superlu_zdefs.h:243
int ** fsendx_plist
Definition: superlu_zdefs.h:206
int64_t * d_Uindval_loc_bc_offset
Definition: superlu_zdefs.h:284
int_t nfrecvmod
Definition: superlu_zdefs.h:255
doublecomplex ** Uinv_bc_ptr
Definition: superlu_zdefs.h:159
doublecomplex * d_Unzval_br_new_dat
Definition: superlu_zdefs.h:274
C_Tree * LBtree_ptr
Definition: superlu_zdefs.h:175
int_t ** Lrowind_bc_2_lsum
Definition: superlu_zdefs.h:158
int ** ToSendR
Definition: superlu_zdefs.h:201
int64_t * Unzval_bc_offset
Definition: superlu_zdefs.h:132
doublecomplex * d_Uinv_bc_dat
Definition: superlu_zdefs.h:278
int64_t Uindval_loc_bc_cnt
Definition: superlu_zdefs.h:138
long int Ucb_indcnt
Definition: superlu_zdefs.h:246
int_t ** Lrowind_bc_ptr
Definition: superlu_zdefs.h:98
C_Tree * UBtree_ptr
Definition: superlu_zdefs.h:177
int_t * Ucolind_br_dat
Definition: superlu_zdefs.h:146
int_t ** Uindval_loc_bc_ptr
Definition: superlu_zdefs.h:135
int nfsendx
Definition: superlu_zdefs.h:209
long int * Lnzval_bc_offset
Definition: superlu_zdefs.h:107
int64_t * Uindval_loc_bc_offset
Definition: superlu_zdefs.h:137
int * bmod
Definition: superlu_zdefs.h:210
int64_t * d_Unzval_br_new_offset
Definition: superlu_zdefs.h:275
doublecomplex ** Lnzval_bc_ptr
Definition: superlu_zdefs.h:104
int * ToSendD
Definition: superlu_zdefs.h:200
Ucb_indptr_t * Ucb_inddat
Definition: superlu_zdefs.h:244
int nleaf
Definition: superlu_zdefs.h:216
C_Tree * d_LBtree_ptr
Definition: superlu_zdefs.h:300
int64_t * d_Uind_br_offset
Definition: superlu_zdefs.h:269
long int * Uinv_bc_offset
Definition: superlu_zdefs.h:161
long int * Ufstnz_br_offset
Definition: superlu_zdefs.h:166
long int * Ucb_valoffset
Definition: superlu_zdefs.h:250
long int Ufstnz_br_cnt
Definition: superlu_zdefs.h:167
int_t * Ucb_valdat
Definition: superlu_zdefs.h:249
int64_t * Unzval_br_new_offset
Definition: superlu_zdefs.h:152
int_t ** Ucb_valptr
Definition: superlu_zdefs.h:248
doublecomplex * d_Unzval_bc_dat
Definition: superlu_zdefs.h:270
int64_t * d_Lindval_loc_bc_offset
Definition: superlu_zdefs.h:282
int_t UT_SOLVE
Definition: superlu_zdefs.h:230
int * frecv
Definition: superlu_zdefs.h:207
doublecomplex ** Unzval_br_ptr
Definition: superlu_zdefs.h:169
long int Lnzval_bc_cnt
Definition: superlu_zdefs.h:108
doublecomplex ** Linv_bc_ptr
Definition: superlu_zdefs.h:110
int_t * utmod
Definition: superlu_zdefs.h:235
int_t * ilsum
Definition: superlu_zdefs.h:219
int_t n_utsendx
Definition: superlu_zdefs.h:238
long int Lrowind_bc_cnt
Definition: superlu_zdefs.h:102
doublecomplex * Unzval_bc_dat
Definition: superlu_zdefs.h:131
int_t * d_ilsum
Definition: superlu_zdefs.h:298
long int * Linv_bc_offset
Definition: superlu_zdefs.h:113
long int * Lindval_loc_bc_offset
Definition: superlu_zdefs.h:121
int_t * Unnz
Definition: superlu_zdefs.h:157
int64_t * d_Ucolind_br_offset
Definition: superlu_zdefs.h:273
int_t * Lrowind_bc_dat
Definition: superlu_zdefs.h:100
long int * Lrowind_bc_offset
Definition: superlu_zdefs.h:101
int_t ** ut_sendx_plist
Definition: superlu_zdefs.h:236
int_t * Urbs
Definition: superlu_zdefs.h:242
int_t L_SOLVE
Definition: superlu_zdefs.h:231
long int * d_Lnzval_bc_offset
Definition: superlu_zdefs.h:265
int_t n_utrecvmod
Definition: superlu_zdefs.h:240
int_t ** Ucolind_br_ptr
Definition: superlu_zdefs.h:145
int64_t Unzval_bc_cnt
Definition: superlu_zdefs.h:133
doublecomplex * Linv_bc_dat
Definition: superlu_zdefs.h:112
doublecomplex * Unzval_br_dat
Definition: superlu_zdefs.h:170
int nbcol_masked
Definition: superlu_zdefs.h:257
doublecomplex * Lnzval_bc_dat
Definition: superlu_zdefs.h:106
int ** bsendx_plist
Definition: superlu_zdefs.h:211
doublecomplex * Uinv_bc_dat
Definition: superlu_zdefs.h:160
int * fmod
Definition: superlu_zdefs.h:205
int_t ** Ucolind_bc_ptr
Definition: superlu_zdefs.h:125
int64_t Ucolind_bc_cnt
Definition: superlu_zdefs.h:128
int_t SolveMsgSent
Definition: superlu_zdefs.h:222
long int Ucb_valcnt
Definition: superlu_zdefs.h:251
long int * Ucb_indoffset
Definition: superlu_zdefs.h:245
int_t ** Lindval_loc_bc_ptr
Definition: superlu_zdefs.h:116
int_t * d_Ucolind_bc_dat
Definition: superlu_zdefs.h:266
int_t ldalsum
Definition: superlu_zdefs.h:221
int * brecv
Definition: superlu_zdefs.h:212
long int * d_Lrowind_bc_offset
Definition: superlu_zdefs.h:263
int64_t Uind_br_cnt
Definition: superlu_zdefs.h:143
int * mod_bit
Definition: superlu_zdefs.h:215
long int Lindval_loc_bc_cnt
Definition: superlu_zdefs.h:122
int_t * d_Lindval_loc_bc_dat
Definition: superlu_zdefs.h:281
doublecomplex * Unzval_br_new_dat
Definition: superlu_zdefs.h:151
long int * d_Linv_bc_offset
Definition: superlu_zdefs.h:279
int64_t Ucolind_br_cnt
Definition: superlu_zdefs.h:148
C_Tree * d_URtree_ptr
Definition: superlu_zdefs.h:303
C_Tree * d_LRtree_ptr
Definition: superlu_zdefs.h:301
Definition: superlu_zdefs.h:371
doublecomplex * d_lsum
Definition: superlu_zdefs.h:387
int_t * A_colind_gsmv
Definition: superlu_zdefs.h:378
int_t * diag_len
Definition: superlu_zdefs.h:374
int_t * xrow_to_proc
Definition: superlu_zdefs.h:382
pzgsmv_comm_t * gsmv_comm
Definition: superlu_zdefs.h:375
pxgstrs_comm_t * gstrs_comm
Definition: superlu_zdefs.h:377
int * d_fmod
Definition: superlu_zdefs.h:389
NRformat_loc3d * A3d
Definition: superlu_zdefs.h:383
int_t * row_to_proc
Definition: superlu_zdefs.h:372
doublecomplex * d_x
Definition: superlu_zdefs.h:388
int * d_bmod
Definition: superlu_zdefs.h:390
int_t * inv_perm_c
Definition: superlu_zdefs.h:373
Definition: superlu_zdefs.h:76
DiagScale_t DiagScale
Definition: superlu_zdefs.h:77
double * R
Definition: superlu_zdefs.h:78
int_t * perm_c
Definition: superlu_zdefs.h:81
double * C
Definition: superlu_zdefs.h:79
int_t * perm_r
Definition: superlu_zdefs.h:80
Definition: superlu_zdefs.h:467
doublecomplex * BlockUFactor
Definition: superlu_zdefs.h:469
doublecomplex * BlockLFactor
Definition: superlu_zdefs.h:468
Definition: superlu_zdefs.h:481
doublecomplex * tU
Definition: superlu_zdefs.h:483
int_t * indCols
Definition: superlu_zdefs.h:484
doublecomplex * tX
Definition: superlu_zdefs.h:482
Definition: superlu_zdefs.h:461
doublecomplex * bigU
Definition: superlu_zdefs.h:462
doublecomplex * bigV
Definition: superlu_zdefs.h:463
Definition: superlu_zdefs.h:318
sForest_t ** sForests
Definition: superlu_zdefs.h:326
int_t * iperm_c_supno
Definition: superlu_zdefs.h:321
int_t ** treePerm
Definition: superlu_zdefs.h:325
zLUValSubBuf_t * LUvsb
Definition: superlu_zdefs.h:329
SupernodeToGridMap_t * superGridMap
Definition: superlu_zdefs.h:330
int * gemmCsizes
Definition: superlu_zdefs.h:336
int_t * supernode2treeMap
Definition: superlu_zdefs.h:327
int_t * myZeroTrIdxs
Definition: superlu_zdefs.h:324
int_t * myTreeIdxs
Definition: superlu_zdefs.h:323
int_t * myNodeCount
Definition: superlu_zdefs.h:322
int_t nsupers
Definition: superlu_zdefs.h:319
int mxLeafNode
Definition: superlu_zdefs.h:334
int * supernodeMask
Definition: superlu_zdefs.h:328
int * diagDims
Definition: superlu_zdefs.h:335
int maxLvl
Definition: superlu_zdefs.h:331
gEtreeInfo_t gEtreeInfo
Definition: superlu_zdefs.h:320
Definition: superlu_zdefs.h:474
int_t * ilsumT
Definition: superlu_zdefs.h:477
int_t ldaspaT
Definition: superlu_zdefs.h:476
doublecomplex * xT
Definition: superlu_zdefs.h:475
Definitions which are precision-neutral.
SupernodeToGridMap_t
Definition: superlu_defs.h:1291
trtype_t
Definition: superlu_defs.h:1317
int64_t handle_t
Definition: superlu_defs.h:348
#define NBUFFERS
Definition: superlu_defs.h:205
int64_t int_t
Definition: superlu_defs.h:119
DiagScale_t
Definition: superlu_enum_consts.h:35
fact_t
Definition: superlu_enum_consts.h:30
float pzdistribute3d_Yang(superlu_dist_options_t *options, int_t n, SuperMatrix *A, zScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: pzdistribute3d.c:24
int zcreate_matrix_postfix3d(SuperMatrix *A, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
Definition: zcreate_matrix3d.c:71
void z3D_printMemUse(ztrf3Dpartition_t *trf3Dpartition, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: zmemory_dist.c:242
int_t zlocalSolveXkYk(trtype_t trtype, int_t k, doublecomplex *x, int nrhs, zLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: pzgstrs3d.c:6149
int_t pzgstrs_init(int_t, int_t, int_t, int_t, int_t[], int_t[], gridinfo_t *grid, Glu_persist_t *, zSOLVEstruct_t *)
Definition: pzutil.c:649
int_t zUPanelUpdate(int_t k, int *factored_U, MPI_Request *, doublecomplex *BlockLFactor, doublecomplex *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, zLUstruct_t *, SuperLUStat_t *, SCT_t *)
int_t initPackLUInfo(int_t nsupers, packLUInfo_t *packLUInfo)
Definition: treeFactorization.c:168
void zlsum_bmod_inv_gpu_wrap(superlu_dist_options_t *, int, int, int, int, doublecomplex *, doublecomplex *, int, int, int_t, int *, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, int_t *, int64_t *, int_t *, int64_t *, doublecomplex *, int64_t *, doublecomplex *, int64_t *, doublecomplex *, int64_t *, int_t *, int64_t *, int_t *, gridinfo_t *, int_t, uint64_t *, uint64_t *, doublecomplex *, doublecomplex *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int)
void zCreate_CompRowLoc_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, int_t, int_t, doublecomplex *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
void zDumpLblocks3D(int_t nsupers, gridinfo3d_t *grid3d, Glu_persist_t *Glu_persist, zLocalLU_t *Llu)
Dump the factored matrix L using matlab triple-let format.
Definition: zutil_dist.c:1102
int_t zgatherFactoredLU(int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, zLUValSubBuf_t *LUvsb, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void zlsum_bmod_inv_master(doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, zLocalLU_t *, SuperLUStat_t **, int_t, int_t, int, int)
Definition: pzgstrs_lsum.c:1899
void zinit3DLUstructForest(int_t *myTreeIdxs, int_t *myZeroTrIdxs, sForest_t **sForests, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void zGenCSRLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, zLocalLU_t *, doublecomplex **, int_t **, int_t **, int_t *, int_t *)
void pzgstrs2(int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, zLocalLU_t *Llu, SuperLUStat_t *stat)
int zpivot_batch(superlu_dist_options_t *, int batchCount, int m, int n, handle_t *, double **ReqPtr, double **CeqPtr, DiagScale_t *, int **RpivPtr)
Compute row pivotings for each matrix, for numerical stability.
Definition: zpivot_batch.c:44
void zFillRHS_dist(char *, int_t, doublecomplex *, int_t, SuperMatrix *, doublecomplex *, int_t)
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's.
Definition: zutil_dist.c:572
void zPrint_CompCol_Matrix_dist(SuperMatrix *)
int updateDirtyBit(int_t k0, HyP_t *HyP, gridinfo_t *grid)
Definition: sec_structs.c:651
int zreduceAllAncestors3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm, zLUValSubBuf_t *LUvsb, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t zlsumReducePrK(int_t k, doublecomplex *x, doublecomplex *lsum, doublecomplex *recvbuf, int nrhs, zLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:4157
void pzgsmv_init(SuperMatrix *, int_t *, gridinfo_t *, pzgsmv_comm_t *)
Definition: pzgsmv.c:26
int zDeAllocLlu_3d(int_t n, zLUstruct_t *, gridinfo3d_t *)
Definition: zutil_dist.c:490
int_t zlasum_bmod_Tree(int_t pTree, int_t cTree, doublecomplex *lsum, doublecomplex *x, zxT_struct *xT_s, int nrhs, zlsumBmod_buff_t *lbmod_buf, zLUstruct_t *LUstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, SuperLUStat_t *stat)
Definition: pzgstrs3d.c:3901
int_t zIRecv_UDiagBlock(int_t k0, doublecomplex *ublk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int)
doublecomplex * zgetBigV(int_t, int_t)
int sp_ztrsv_dist(char *, char *, char *, SuperMatrix *, SuperMatrix *, doublecomplex *, int *)
Definition: zsp_blas2_dist.c:94
int ztrsm_(const char *, const char *, const char *, const char *, const int *, const int *, const doublecomplex *, const doublecomplex *, const int *, doublecomplex *, const int *)
float zdistribute(superlu_dist_options_t *, int_t, SuperMatrix *, Glu_freeable_t *, zLUstruct_t *, gridinfo_t *)
Definition: zdistribute.c:66
void zlaqgs_dist(SuperMatrix *, double *, double *, double, double, double, char *)
Definition: zlaqgs_dist.c:92
void pzgssvx_ABglobal(superlu_dist_options_t *, SuperMatrix *, zScalePermstruct_t *, doublecomplex *, int, int, gridinfo_t *, zLUstruct_t *, double *, SuperLUStat_t *, int *)
int_t pzReDistribute3d_B_to_X(doublecomplex *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, doublecomplex *x, zScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo3d_t *grid3d, zSOLVEstruct_t *SOLVEstruct)
Definition: pzgstrs3d.c:6285
int_t zDiagFactIBCast(int_t k, int_t k0, doublecomplex *BlockUFactor, doublecomplex *BlockLFactor, int *IrecvPlcd_D, MPI_Request *, MPI_Request *, MPI_Request *, MPI_Request *, gridinfo_t *, superlu_dist_options_t *, double thresh, zLUstruct_t *LUstruct, SuperLUStat_t *, int *info, SCT_t *, int tag_ub)
void pzgsmv(int_t, SuperMatrix *, gridinfo_t *, pzgsmv_comm_t *, doublecomplex x[], doublecomplex ax[])
Definition: pzgsmv.c:234
int_t scuStatUpdate(int_t knsupc, HyP_t *HyP, SCT_t *SCT, SuperLUStat_t *stat)
Definition: sec_structs.c:668
int_t zleafForestBackSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *x, doublecomplex *lsum, doublecomplex *recvbuf, MPI_Request *send_req, int nrhs, zlsumBmod_buff_t *lbmod_buf, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:4288
void Local_Zgstrf2(superlu_dist_options_t *options, int_t k, double thresh, doublecomplex *BlockUFactor, Glu_persist_t *, gridinfo_t *, zLocalLU_t *, SuperLUStat_t *, int *info, SCT_t *)
int_t zlsumForestBsolve(int_t k, int_t treeId, doublecomplex *lsum, doublecomplex *x, zxT_struct *xT_s, int nrhs, zlsumBmod_buff_t *lbmod_buf, zLUstruct_t *LUstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, SuperLUStat_t *stat)
Definition: pzgstrs3d.c:4054
doublecomplex * zready_lsum
Definition: pzgstrs3d.c:149
int_t zLPanelUpdate(int_t k, int *IrecvPlcd_D, int *factored_L, MPI_Request *, doublecomplex *BlockUFactor, gridinfo_t *, zLUstruct_t *, SCT_t *)
void zgstrf2(int_t k, doublecomplex *diagBlk, int_t LDA, doublecomplex *BlockUfactor, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
Definition: pzgstrf2.c:404
int sp_zgemv_dist(char *, doublecomplex, SuperMatrix *, doublecomplex *, int, doublecomplex, doublecomplex *, int)
SpGEMV.
Definition: zsp_blas2_dist.c:398
void zreadtriple_dist(FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **)
Definition: zreadtriple.c:34
int_t pzgstrs_delete_device_lsum_x(zSOLVEstruct_t *)
Definition: pzutil.c:1067
int zcreate_matrix3d(SuperMatrix *A, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, gridinfo3d_t *grid3d)
Definition: zcreate_matrix3d_Jake.c:67
void zreadMM_dist(FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **)
Definition: zreadMM.c:37
int_t zIBcast_UPanel(int_t k, int_t k0, int_t *usub, doublecomplex *uval, gridinfo_t *, int *msgcnt, MPI_Request *, int *ToSendD, int)
struct zxT_struct zxT_struct
int_t znonLeafForestForwardSolve3d(int_t treeId, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *x, doublecomplex *lsum, zxT_struct *xT_s, doublecomplex *recvbuf, doublecomplex *rtemp, MPI_Request *send_req, int nrhs, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:1899
void pzgstrf2(superlu_dist_options_t *, int_t nsupers, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, zLocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *)
zdiagFactBufs_t ** zinitDiagFactBufsArr(int mxLeafNode, int ldt, gridinfo_t *grid)
struct zlsumBmod_buff_t zlsumBmod_buff_t
double pzlangs(char *, SuperMatrix *, gridinfo_t *)
Definition: pzlangs.c:64
void pzgstrs3d_newsolve(superlu_dist_options_t *options, int_t n, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info)
Definition: pzgstrs3d.c:6954
int getNsupers(int, Glu_persist_t *)
Definition: trfAux.c:42
void pzinf_norm_error(int, int_t, int_t, doublecomplex[], int_t, doublecomplex[], int_t, MPI_Comm)
Check the inf-norm of the error vector.
Definition: pzutil.c:1274
double zMaxAbsLij(int iam, int n, Glu_persist_t *, zLUstruct_t *, gridinfo_t *)
Find max(abs(L(i,j)))
Definition: zutil_dist.c:639
void pzgssvx(superlu_dist_options_t *, SuperMatrix *, zScalePermstruct_t *, doublecomplex *, int, int, gridinfo_t *, zLUstruct_t *, zSOLVEstruct_t *, double *, SuperLUStat_t *, int *)
void zCreate_CompCol_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, doublecomplex *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
void pzgstrs2_omp(int_t k0, int_t k, Glu_persist_t *, gridinfo_t *, zLocalLU_t *, Ublock_info_t *, SuperLUStat_t *)
Definition: pzgstrf2.c:852
int_t zAllocLlu_3d(int_t nsupers, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int superlu_zgemv(const char *trans, const int m, const int n, const doublecomplex alpha, const doublecomplex *a, const int lda, const doublecomplex *x, const int incx, const doublecomplex beta, doublecomplex *y, const int incy)
void pzCompute_Diag_Inv(int_t, zLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *)
Definition: pzgstrs.c:664
void zDestroy_LU(int_t, gridinfo_t *, zLUstruct_t *)
Destroy distributed L & U matrices.
Definition: pzutil.c:441
void Free_HyP(HyP_t *HyP)
Definition: sec_structs.c:627
int_t zPackLBlock(int_t k, doublecomplex *Dest, Glu_persist_t *, gridinfo_t *, zLocalLU_t *)
void * zuser_malloc_dist(int_t, int_t)
Definition: zmemory_dist.c:29
int_t ztrs_X_gather3d(doublecomplex *x, int nrhs, ztrf3Dpartition_t *trf3Dpartition, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:1551
void zlsum_bmod_GG(doublecomplex *lsum, doublecomplex *x, doublecomplex *xk, int nrhs, zlsumBmod_buff_t *lbmod_buf, int_t k, int *bmod, int_t *Urbs, Ucb_indptr_t **Ucb_indptr, int_t **Ucb_valptr, int_t *xsup, gridinfo_t *grid, zLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:5703
int zPrint_CompRowLoc_Matrix_dist(SuperMatrix *)
int zcreate_matrix_postfix(SuperMatrix *, int, doublecomplex **, int *, doublecomplex **, int *, FILE *, char *, gridinfo_t *)
Definition: zcreate_matrix.c:75
void zlsum_bmod_GG_newsolve(ztrf3Dpartition_t *trf3Dpartition, doublecomplex *lsum, doublecomplex *x, doublecomplex *xk, int nrhs, zlsumBmod_buff_t *lbmod_buf, int_t k, int *bmod, int_t *Urbs, Ucb_indptr_t **Ucb_indptr, int_t **Ucb_valptr, int_t *xsup, gridinfo_t *grid, zLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:5888
int_t zWait_LRecv(MPI_Request *, int *msgcnt, int *msgcntsU, gridinfo_t *, SCT_t *)
int_t zQuerySpace_dist(int_t, zLUstruct_t *, gridinfo_t *, SuperLUStat_t *, superlu_dist_mem_usage_t *)
Definition: zmemory_dist.c:72
void zlsum_fmod(doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, int, int, int_t, int *fmod, int_t, int_t, int_t, int_t *, gridinfo_t *, zLocalLU_t *, MPI_Request[], SuperLUStat_t *)
Definition: pzgstrs_lsum.c:61
int_t pzgstrs_init_device_lsum_x(superlu_dist_options_t *, int_t, int_t, int_t, gridinfo_t *, zLUstruct_t *, zSOLVEstruct_t *, int *)
Definition: pzutil.c:778
void pzgsequ(SuperMatrix *, double *, double *, double *, double *, double *, int *, gridinfo_t *)
Definition: pzgsequ.c:86
int_t zbroadcastAncestor3d(ztrf3Dpartition_t *trf3Dpartition, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t checkRecvUDiag(int_t k, commRequests_t *comReqs, gridinfo_t *grid, SCT_t *SCT)
Definition: treeFactorization.c:205
int_t zzRecvLPanel(int_t k, int_t sender, doublecomplex alpha, doublecomplex beta, doublecomplex *Lval_buf, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
doublecomplex * doublecomplexMalloc_dist(int_t)
Definition: zmemory_dist.c:154
int_t zgatherAllFactoredLUFr(int_t *myZeroTrIdxs, sForest_t *sForests, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t zzSendUPanel(int_t k, int_t receiver, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t zblock_gemm_scatterTopRight(int_t lb, int_t j, doublecomplex *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, zLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int_t zISend_LDiagBlock(int_t k0, doublecomplex *lblk_ptr, int_t size, MPI_Request *, gridinfo_t *, int)
void zgather_l(int_t num_LBlk, int_t knsupc, Remain_info_t *L_info, doublecomplex *lval, int_t LD_lval, doublecomplex *L_buff)
int_t zleafForestForwardSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *x, doublecomplex *lsum, doublecomplex *recvbuf, doublecomplex *rtemp, MPI_Request *send_req, int nrhs, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:1989
int_t ziBcastXk2Pck(int_t k, doublecomplex *x, int nrhs, int **sendList, MPI_Request *send_req, zLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:6198
void pzgstrs3d(superlu_dist_options_t *, int_t n, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info)
Definition: pzgstrs3d.c:6625
int pzPermute_Dense_Matrix(int_t, int_t, int_t[], int_t[], doublecomplex[], int, doublecomplex[], int, int, gridinfo_t *)
Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-t...
Definition: pzutil.c:296
void zDestroy_Tree(int_t, gridinfo_t *, zLUstruct_t *)
Destroy broadcast and reduction trees used in triangular solve.
Definition: pzutil.c:1313
int zfreeScuBufs(zscuBufs_t *scuBufs)
void pzconvert_flatten_skyline2UROWDATA(superlu_dist_options_t *, gridinfo_t *, zLUstruct_t *, SuperLUStat_t *, int n)
Definition: pzgssvx.c:2335
void zinf_norm_error_dist(int_t, int_t, doublecomplex *, int_t, doublecomplex *, int_t, gridinfo_t *)
Check the inf-norm of the error vector.
Definition: zutil_dist.c:595
int_t pzgsTrBackSolve3d(superlu_dist_options_t *options, int_t n, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *x3d, doublecomplex *lsum3d, zxT_struct *xT_s, doublecomplex *recvbuf, MPI_Request *send_req, int nrhs, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:7584
int_t zIBcastRecvLPanel(int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Lsub_buf, doublecomplex *Lval_buf, int *factored, gridinfo_t *, zLUstruct_t *, SCT_t *, int tag_ub)
int freePackLUInfo(packLUInfo_t *packLUInfo)
Definition: treeFactorization.c:177
void zZero_CompRowLoc_Matrix_dist(SuperMatrix *)
Sets all entries of a matrix to zero, A_{i,j}=0, for i,j=1,..,n.
Definition: zutil_dist.c:377
int_t zscatter3dLPanels(int_t nsupers, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
void PrintDoublecomplex(char *, int_t, doublecomplex *)
Definition: zutil_dist.c:618
void zreadrb_dist(int, FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **)
Definition: zreadrb.c:283
void zInit_HyP(superlu_dist_options_t *, HyP_t *HyP, zLocalLU_t *Llu, int_t mcb, int_t mrb)
int_t zTrs2_GatherTrsmScatter(int_t klst, int_t iukp, int_t rukp, int_t *usub, doublecomplex *uval, doublecomplex *tempv, int_t knsupc, int nsupr, doublecomplex *lusup, Glu_persist_t *Glu_persist)
Definition: pzgstrf2.c:806
void zuser_free_dist(int_t, int_t)
Definition: zmemory_dist.c:48
void nv_init_wrapper(MPI_Comm)
int zScatter_B3d(NRformat_loc3d *A3d, gridinfo3d_t *grid3d)
int_t zdenseTreeFactor(int_t nnnodes, int_t *perm_c_supno, commRequests_t *comReqs, zscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, zLUValSubBuf_t *LUvsb, zdiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
void zscatter_u(int ib, int jb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *lsub, int_t *usub, doublecomplex *tempv, int_t **Ufstnz_br_ptr, doublecomplex **Unzval_br_ptr, gridinfo_t *grid)
int zaxpy_(const int *n, const doublecomplex *alpha, const doublecomplex *x, const int *incx, doublecomplex *y, const int *incy)
int_t zUDiagBlockRecvWait(int_t k, int *IrecvPlcd_D, int *factored_L, MPI_Request *, gridinfo_t *, zLUstruct_t *, SCT_t *)
float pzdistribute_allgrid(superlu_dist_options_t *options, int_t n, SuperMatrix *A, zScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, zLUstruct_t *LUstruct, gridinfo_t *grid, int *supernodeMask)
Definition: pzdistribute.c:2330
void pzgsrfs3d(superlu_dist_options_t *, int_t, SuperMatrix *, double, zLUstruct_t *, zScalePermstruct_t *, gridinfo3d_t *, ztrf3Dpartition_t *, doublecomplex *, int_t, doublecomplex *, int_t, int, zSOLVEstruct_t *, double *, SuperLUStat_t *, int *)
Definition: pzgsrfs.c:365
void zComputeLevelsets(int, int_t, gridinfo_t *, Glu_persist_t *, zLocalLU_t *, int_t *)
int_t zblock_gemm_scatterTopLeft(int_t lb, int_t j, doublecomplex *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, zLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int ztrs_compute_communication_structure(superlu_dist_options_t *options, int_t n, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, int *supernodeMask, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: pzgstrs3d.c:152
int_t zTrs2_ScatterU(int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, doublecomplex *uval, doublecomplex *tempv)
Definition: pzgstrf2.c:784
double zMaxAbsUij(int iam, int n, Glu_persist_t *, zLUstruct_t *, gridinfo_t *)
Find max(abs(U(i,j)))
Definition: zutil_dist.c:680
int_t zIBcast_LPanel(int_t k, int_t k0, int_t *lsub, doublecomplex *lusup, gridinfo_t *, int *msgcnt, MPI_Request *, int **ToSendR, int_t *xsup, int)
void zCompCol_to_CompRow_dist(int_t m, int_t n, int_t nnz, doublecomplex *a, int_t *colptr, int_t *rowind, doublecomplex **at, int_t **rowptr, int_t **colind)
int_t zBcast_LPanel(int_t k, int_t k0, int_t *lsub, doublecomplex *lusup, gridinfo_t *, int *msgcnt, int **ToSendR, int_t *xsup, SCT_t *, int)
void zCompRow_to_CompCol_dist(int_t, int_t, int_t, doublecomplex *, int_t *, int_t *, doublecomplex **, int_t **, int_t **)
int zgemm_(const char *, const char *, const int *, const int *, const int *, const doublecomplex *, const doublecomplex *, const int *, const doublecomplex *, const int *, const doublecomplex *, doublecomplex *, const int *)
void pzgstrs(superlu_dist_options_t *, int_t, zLUstruct_t *, zScalePermstruct_t *, gridinfo_t *, doublecomplex *, int_t, int_t, int_t, int, zSOLVEstruct_t *, SuperLUStat_t *, int *)
Definition: pzgstrs.c:862
int z_c2cpp_GetHWPM(SuperMatrix *, gridinfo_t *, zScalePermstruct_t *)
Definition: z_c2cpp_GetHWPM.cpp:54
int zldperm_dist(int, int, int_t, int_t[], int_t[], doublecomplex[], int_t *, double[], double[])
Definition: zldperm_dist.c:95
int_t zLPanelTrSolve(int_t k, int *factored_L, doublecomplex *BlockUFactor, gridinfo_t *, zLUstruct_t *)
void pzgssvx3d(superlu_dist_options_t *, SuperMatrix *, zScalePermstruct_t *, doublecomplex B[], int ldb, int nrhs, gridinfo3d_t *, zLUstruct_t *, zSOLVEstruct_t *, double *berr, SuperLUStat_t *, int *info)
Definition: pzgssvx3d.c:517
int_t zBcast_UPanel(int_t k, int_t k0, int_t *usub, doublecomplex *uval, gridinfo_t *, int *msgcnt, int *ToSendD, SCT_t *, int)
int superlu_ztrsv(char *uplo, char *trans, char *diag, int n, doublecomplex *a, int lda, doublecomplex *x, int incx)
int_t zp3dScatter(int_t n, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
int_t zcollect3dUpanels(int_t layer, int_t nsupers, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void zbcastPermutedSparseA(SuperMatrix *A, zScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: z3DPartition.c:186
#define MAX_LOOKAHEADS
Definition: superlu_zdefs.h:96
int_t zinitLsumBmod_buff(int_t ns, int nrhs, zlsumBmod_buff_t *lbmod_buf)
Definition: pzgstrs3d.c:3934
void zGenCOOLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, zLocalLU_t *, int_t **, int_t **, doublecomplex **, int_t *, int_t *)
int_t zcollect3dLpanels(int_t layer, int_t nsupers, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int superlu_zgemm(const char *transa, const char *transb, int m, int n, int k, doublecomplex alpha, doublecomplex *a, int lda, doublecomplex *b, int ldb, doublecomplex beta, doublecomplex *c, int ldc)
void zCreate_Dense_Matrix_dist(SuperMatrix *, int_t, int_t, doublecomplex *, int_t, Stype_t, Dtype_t, Mtype_t)
void zRgather_U(int_t k, int_t jj0, int_t *usub, doublecomplex *uval, doublecomplex *bigU, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno, int_t *perm_u)
int_t zsparseTreeFactor_ASYNC(sForest_t *sforest, commRequests_t **comReqss, zscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, zLUValSubBuf_t **LUvsbs, zdiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, HyP_t *HyP, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
void zlsum_bmod_inv(doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, zLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int, int)
Definition: pzgstrs_lsum.c:1404
int_t pzgsTrForwardSolve3d(superlu_dist_options_t *options, int_t n, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *x3d, doublecomplex *lsum3d, zxT_struct *xT_s, doublecomplex *recvbuf, MPI_Request *send_req, int nrhs, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:7330
doublecomplex * doublecomplexCalloc_dist(int_t)
Definition: zmemory_dist.c:161
void zGenXtrue_dist(int_t, int_t, doublecomplex *, int_t)
Definition: zutil_dist.c:526
void zfill_dist(doublecomplex *, int_t, doublecomplex)
Fills a doublecomplex precision array with a given value.
Definition: zutil_dist.c:585
int pzgsmv_AXglobal_abs(int_t, int_t[], doublecomplex[], int_t[], doublecomplex[], double[])
Definition: pzgsmv_AXglobal.c:288
int zstatic_schedule(superlu_dist_options_t *, int, int, zLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int_t *, int_t *, int *)
Definition: zstatic_schedule.c:45
int superlu_zscal(const int n, const doublecomplex alpha, doublecomplex *x, const int incx)
doublecomplex * zgetBigU(superlu_dist_options_t *, int_t, gridinfo_t *, zLUstruct_t *)
ztrf3Dpartition_t * zinitTrf3Dpartition_allgrid(int_t n, superlu_dist_options_t *options, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void zLUstructFree(zLUstruct_t *)
Deallocate LUstruct.
Definition: pzutil.c:421
void zCopy_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *)
Definition: zutil_dist.c:362
int zcreate_matrix(SuperMatrix *, int, doublecomplex **, int *, doublecomplex **, int *, FILE *, gridinfo_t *)
Definition: zcreate_matrix.c:347
int sp_zgemm_dist(char *, int, doublecomplex, SuperMatrix *, doublecomplex *, int, doublecomplex, doublecomplex *, int)
Definition: zsp_blas3_dist.c:125
int zwriteLUtoDisk(int nsupers, int_t *xsup, zLUstruct_t *LUstruct)
Definition: zutil_dist.c:998
void zDestroy_trf3Dpartition(ztrf3Dpartition_t *trf3Dpartition)
void zallocateA_dist(int_t, int_t, doublecomplex **, int_t **, int_t **)
Definition: zmemory_dist.c:146
void zlsum_fmod_leaf_newsolve(ztrf3Dpartition_t *trf3Dpartition, doublecomplex *lsum, doublecomplex *x, doublecomplex *xk, doublecomplex *rtemp, int nrhs, int knsupc, int_t k, int *fmod, int_t nlb, int_t lptr, int_t luptr, int_t *xsup, gridinfo_t *grid, zLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:3713
int_t zfsolveReduceLsum3d(int_t treeId, int_t sender, int_t receiver, doublecomplex *lsum, doublecomplex *recvbuf, int nrhs, ztrf3Dpartition_t *trf3Dpartition, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:1646
int_t zinit3DLUstruct(int_t *myTreeIdxs, int_t *myZeroTrIdxs, int_t *nodeCount, int_t **nodeList, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t zp2pSolvedX3d(int_t treeId, int_t sender, int_t receiver, doublecomplex *x, int nrhs, ztrf3Dpartition_t *trf3Dpartition, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:1596
int zSolveInit(superlu_dist_options_t *, SuperMatrix *, int_t[], int_t[], int_t, zLUstruct_t *, gridinfo_t *, zSOLVEstruct_t *)
Initialize the data structure for the solution phase.
Definition: pzutil.c:1108
void pzconvertU(superlu_dist_options_t *, gridinfo_t *, zLUstruct_t *, SuperLUStat_t *, int)
int_t zblock_gemm_scatterBottomRight(int_t lb, int_t j, doublecomplex *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, zLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
void zLUstructInit(const int_t, zLUstruct_t *)
Allocate storage in LUstruct.
Definition: pzutil.c:407
void zGatherNRformat_loc3d_allgrid(fact_t Fact, NRformat_loc *A, doublecomplex *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **)
int_t zsparseTreeFactor(int_t nnodes, int_t *perm_c_supno, treeTopoInfo_t *treeTopoInfo, commRequests_t *comReqs, zscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, zLUValSubBuf_t *LUvsb, zdiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int *info)
void pzlaqgs(SuperMatrix *, double *, double *, double, double, double, char *)
Definition: pzlaqgs.c:84
void pzgsrfs(superlu_dist_options_t *, int_t, SuperMatrix *, double, zLUstruct_t *, zScalePermstruct_t *, gridinfo_t *, doublecomplex[], int_t, doublecomplex[], int_t, int, zSOLVEstruct_t *, double *, SuperLUStat_t *, int *)
int pzCompRow_loc_to_CompCol_global(int_t, SuperMatrix *, gridinfo_t *, SuperMatrix *)
Gather A from the distributed compressed row format to global A in compressed column format.
Definition: pzutil.c:34
int_t zinitScuBufs(superlu_dist_options_t *, int_t ldt, int_t num_threads, int_t nsupers, zscuBufs_t *, zLUstruct_t *, gridinfo_t *)
int_t pzReDistribute3d_X_to_B(int_t n, doublecomplex *B, int_t m_loc, int_t ldb, int_t fst_row, int nrhs, doublecomplex *x, int_t *ilsum, zScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo3d_t *grid3d, zSOLVEstruct_t *SOLVEstruct)
Definition: pzgstrs3d.c:6425
void zlsum_fmod_leaf(int_t treeId, ztrf3Dpartition_t *trf3Dpartition, doublecomplex *lsum, doublecomplex *x, doublecomplex *xk, doublecomplex *rtemp, int nrhs, int knsupc, int_t k, int *fmod, int_t nlb, int_t lptr, int_t luptr, int_t *xsup, gridinfo_t *grid, zLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:2179
int zAllocGlu_3d(int_t n, int_t nsupers, zLUstruct_t *)
Definition: zutil_dist.c:472
void zZeroUblocks(int iam, int n, gridinfo_t *, zLUstruct_t *)
Sets all entries of matrix U to zero.
Definition: zutil_dist.c:953
void pzgstrs_Bglobal(superlu_dist_options_t *, int_t, zLUstruct_t *, gridinfo_t *, doublecomplex *, int_t, int, SuperLUStat_t *, int *)
Definition: pzgstrs_Bglobal.c:107
int_t zWait_URecv(MPI_Request *, int *msgcnt, SCT_t *)
void validateInput_pzgssvx3d(superlu_dist_options_t *, SuperMatrix *A, int ldb, int nrhs, gridinfo3d_t *, int *info)
Validates the input parameters for a given problem.
Definition: zssvx3dAux.c:23
void pzgsrfs_ABXglobal(superlu_dist_options_t *, int_t, SuperMatrix *, double, zLUstruct_t *, gridinfo_t *, doublecomplex *, int_t, doublecomplex *, int_t, int, double *, SuperLUStat_t *, int *)
Definition: pzgsrfs_ABXglobal.c:124
void ztrtri_(char *, char *, int *, doublecomplex *, int *, int *)
void zScalePermstructFree(zScalePermstruct_t *)
Deallocate ScalePermstruct.
Definition: zutil_dist.c:449
int_t zIrecv_UPanel(int_t k, int_t k0, int_t *Usub_buf, doublecomplex *, zLocalLU_t *, gridinfo_t *, MPI_Request *, int)
void zGatherNRformat_loc3d(fact_t Fact, NRformat_loc *A, doublecomplex *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **)
void pxgstrs_finalize(pxgstrs_comm_t *)
Definition: util.c:318
int_t zbCastXk2Pck(int_t k, zxT_struct *xT_s, int nrhs, zLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:4127
void zgeru_(const int *, const int *, const doublecomplex *, const doublecomplex *, const int *, const doublecomplex *, const int *, doublecomplex *, const int *)
void zPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, zLocalLU_t *)
Print the blocks in the factored matrix L.
Definition: zutil_dist.c:728
int_t zTrs2_GatherU(int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, doublecomplex *uval, doublecomplex *tempv)
Definition: pzgstrf2.c:759
int_t zWaitU(int_t k, int *msgcnt, MPI_Request *, MPI_Request *, gridinfo_t *, zLUstruct_t *, SCT_t *)
double * doubleMalloc_dist(int_t)
Definition: dmemory_dist.c:155
int zscal_(const int *n, const doublecomplex *alpha, doublecomplex *dx, const int *incx)
int_t zblock_gemm_scatterBottomLeft(int_t lb, int_t j, doublecomplex *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, zLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int zcheckArr(doublecomplex *A, doublecomplex *B, int n)
Definition: zutil_dist.c:1040
void zscatter_l(int ib, int ljb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *usub, int_t *lsub, doublecomplex *tempv, int *indirect_thread, int *indirect2, int_t **Lrowind_bc_ptr, doublecomplex **Lnzval_bc_ptr, gridinfo_t *grid)
void pzGetDiagU(int_t, zLUstruct_t *, gridinfo_t *, doublecomplex *)
Definition: pzGetDiagU.c:65
double zcomputeA_Norm(int notran, SuperMatrix *, gridinfo_t *)
int superlu_zaxpy(const int n, const doublecomplex alpha, const doublecomplex *x, const int incx, doublecomplex *y, const int incy)
int zcreate_matrix_rb(SuperMatrix *, int, doublecomplex **, int *, doublecomplex **, int *, FILE *, gridinfo_t *)
int pzflatten_LDATA(superlu_dist_options_t *options, int_t n, zLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: pzgssvx.c:2525
int zcheckLUFromDisk(int nsupers, int_t *xsup, zLUstruct_t *LUstruct)
Definition: zutil_dist.c:1053
void zCopy_CompCol_Matrix_dist(SuperMatrix *, SuperMatrix *)
void zClone_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *)
int_t pzgstrf(superlu_dist_options_t *, int, int, double anorm, zLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *)
Definition: pzgstrf.c:242
void zCreate_SuperNode_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, doublecomplex *, int_t *, int_t *, int_t *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
int_t zgatherAllFactoredLU(ztrf3Dpartition_t *trf3Dpartition, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t zRecv_UDiagBlock(int_t k0, doublecomplex *ublk_ptr, int_t size, int_t src, gridinfo_t *, SCT_t *, int)
int_t zIrecv_LPanel(int_t k, int_t k0, int_t *Lsub_buf, doublecomplex *Lval_buf, gridinfo_t *, MPI_Request *, zLocalLU_t *, int)
int file_zPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A)
void pzconvertUROWDATA2skyline(superlu_dist_options_t *, gridinfo_t *, zLUstruct_t *, SuperLUStat_t *, int n)
Definition: pzgssvx.c:2309
void zPrint_Dense_Matrix_dist(SuperMatrix *)
doublecomplex * zready_x
Definition: pzgstrs3d.c:149
int zcreate_batch_systems(handle_t *SparseMatrix_handles, int batchCount, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
Definition: zcreate_matrix3d.c:300
void zlsum_fmod_inv_gpu_wrap(int, int, int, int, doublecomplex *, doublecomplex *, int, int, int_t, int *fmod, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, doublecomplex *, int64_t *, doublecomplex *, int64_t *, int_t *, int64_t *, int_t *, int *, gridinfo_t *, int_t, uint64_t *, uint64_t *, doublecomplex *, doublecomplex *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int)
void zgsequ_dist(SuperMatrix *, double *, double *, double *, double *, double *, int *)
Definition: zgsequ_dist.c:91
void zblock_gemm_scatter(int_t lb, int_t j, Ublock_info_t *Ublock_info, Remain_info_t *Remain_info, doublecomplex *L_mat, int ldl, doublecomplex *U_mat, int ldu, doublecomplex *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int_t thread_id, int *indirect, int *indirect2, int_t **Lrowind_bc_ptr, doublecomplex **Lnzval_bc_ptr, int_t **Ufstnz_br_ptr, doublecomplex **Unzval_br_ptr, int_t *xsup, gridinfo_t *, SuperLUStat_t *)
void zPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, zLocalLU_t *)
Print the blocks in the factored matrix U.
Definition: zutil_dist.c:912
void zdelete_multiGPU_buffers()
int_t zzSendLPanel(int_t k, int_t receiver, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void zallocScalePermstruct_RC(zScalePermstruct_t *, int_t m, int_t n)
zdiagFactBufs_t ** zinitDiagFactBufsArrMod(int mxLeafNode, int *ldts, gridinfo_t *grid)
int_t zISend_UDiagBlock(int_t k0, doublecomplex *ublk_ptr, int_t size, MPI_Request *, gridinfo_t *, int)
void zscaleMatrixDiagonally(fact_t Fact, zScalePermstruct_t *, SuperMatrix *, SuperLUStat_t *, gridinfo_t *, int *rowequ, int *colequ, int *iinfo)
Definition: zssvx3dAux.c:162
double zlangs_dist(char *, SuperMatrix *)
Definition: zlangs_dist.c:71
void zprepare_multiGPU_buffers(int, int, int, int, int, int)
void zScalePermstructInit(const int_t, const int_t, zScalePermstruct_t *)
Allocate storage in ScalePermstruct.
Definition: zutil_dist.c:438
int zcreate_block_diag_3d(SuperMatrix *A, int batchCount, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
void zScaleAddId_CompRowLoc_Matrix_dist(SuperMatrix *, doublecomplex)
Scale and add I: scales a matrix and adds an identity. A_{i,j} = c * A_{i,j} + \delta_{i,...
Definition: zutil_dist.c:394
void zperform_row_permutation(superlu_dist_options_t *, fact_t Fact, zScalePermstruct_t *, zLUstruct_t *LUstruct, int_t m, int_t n, gridinfo_t *, SuperMatrix *A, SuperMatrix *GA, SuperLUStat_t *, int job, int Equil, int *rowequ, int *colequ, int *iinfo)
Definition: zssvx3dAux.c:464
int_t zUPanelTrSolve(int_t k, doublecomplex *BlockLFactor, doublecomplex *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, zLUstruct_t *, SuperLUStat_t *, SCT_t *)
int_t zLpanelUpdate(int_t off0, int_t nsupc, doublecomplex *ublk_ptr, int_t ld_ujrow, doublecomplex *lusup, int_t nsupr, SCT_t *)
int_t pzReDistribute_B_to_X(doublecomplex *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, doublecomplex *x, zScalePermstruct_t *, Glu_persist_t *, gridinfo_t *, zSOLVEstruct_t *)
Definition: pzgstrs.c:164
void zDestroy_A3d_gathered_on_2d(zSOLVEstruct_t *, gridinfo3d_t *)
Definition: pzutil.c:1244
void zlsum_bmod(doublecomplex *, doublecomplex *, doublecomplex *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, zLocalLU_t *, MPI_Request[], SuperLUStat_t *)
Definition: pzgstrs_lsum.c:250
int_t zIBcastRecvUPanel(int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Usub_buf, doublecomplex *Uval_buf, gridinfo_t *, zLUstruct_t *, SCT_t *, int tag_ub)
int zfreeDiagFactBufsArr(int mxLeafNode, zdiagFactBufs_t **dFBufs)
int superlu_zger(const int m, const int n, const doublecomplex alpha, const doublecomplex *x, const int incx, const doublecomplex *y, const int incy, doublecomplex *a, const int lda)
void zreadtriple_noheader(FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **)
Definition: zreadtriple_noheader.c:34
void zgemv_(const char *, const int *, const int *, const doublecomplex *, const doublecomplex *a, const int *, const doublecomplex *, const int *, const doublecomplex *, doublecomplex *, const int *)
float zdist_psymbtonum(superlu_dist_options_t *, int_t, SuperMatrix *, zScalePermstruct_t *, Pslu_freeable_t *, zLUstruct_t *, gridinfo_t *)
Definition: pzsymbfact_distdata.c:1218
int_t zzRecvUPanel(int_t k, int_t sender, doublecomplex alpha, doublecomplex beta, doublecomplex *Uval_buf, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void zCopy_Dense_Matrix_dist(int_t, int_t, doublecomplex *, int_t, doublecomplex *, int_t)
int_t zLluBufInit(zLUValSubBuf_t *, zLUstruct_t *)
int zequil_batch(superlu_dist_options_t *, int batchCount, int m, int n, handle_t *, double **ReqPtr, double **CeqPtr, DiagScale_t *)
Equilibrate the systems using the LAPACK-style algorithm.
Definition: zequil_batch.c:42
int_t ztrs_B_init3d(int_t nsupers, doublecomplex *x, int nrhs, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: pzgstrs3d.c:31
int pzgsmv_AXglobal_setup(SuperMatrix *, Glu_persist_t *, gridinfo_t *, int_t *, int_t *[], doublecomplex *[], int_t *[], int_t[])
int zDeAllocGlu_3d(zLUstruct_t *)
Definition: zutil_dist.c:482
int_t zbsolve_Xt_bcast(int_t ilvl, zxT_struct *xT_s, int nrhs, ztrf3Dpartition_t *trf3Dpartition, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:1719
float pzdistribute(superlu_dist_options_t *, int_t, SuperMatrix *, zScalePermstruct_t *, Glu_freeable_t *, zLUstruct_t *, gridinfo_t *)
Definition: pzdistribute.c:328
void zreadhb_dist(int, FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **)
Definition: zreadhb.c:105
int_t pzgsTrBackSolve3d_newsolve(superlu_dist_options_t *options, int_t n, zLUstruct_t *LUstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *x3d, doublecomplex *lsum3d, doublecomplex *recvbuf, MPI_Request *send_req, int nrhs, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:7712
int zcreate_matrix_dat(SuperMatrix *, int, doublecomplex **, int *, doublecomplex **, int *, FILE *, gridinfo_t *)
int_t zSchurComplementSetupGPU(int_t k, msgs_t *msgs, packLUInfo_t *, int_t *, int_t *, int_t *, gEtreeInfo_t *, factNodelists_t *, zscuBufs_t *, zLUValSubBuf_t *LUvsb, gridinfo_t *, zLUstruct_t *, HyP_t *)
int_t checkRecvLDiag(int_t k, commRequests_t *comReqs, gridinfo_t *, SCT_t *)
Definition: treeFactorization.c:226
int file_PrintDoublecomplex(FILE *fp, char *, int_t, doublecomplex *)
Definition: zutil_dist.c:627
int_t pzgstrf3d(superlu_dist_options_t *, int m, int n, double anorm, ztrf3Dpartition_t *, SCT_t *, zLUstruct_t *, gridinfo3d_t *, SuperLUStat_t *, int *)
Definition: pzgstrf3d.c:120
int superlu_ztrsm(const char *sideRL, const char *uplo, const char *transa, const char *diag, const int m, const int n, const doublecomplex alpha, const doublecomplex *a, const int lda, doublecomplex *b, const int ldb)
ztrf3Dpartition_t * zinitTrf3DpartitionLUstructgrid0(int_t n, superlu_dist_options_t *options, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t zp3dCollect(int_t layer, int_t n, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
float pzdistribute_allgrid_index_only(superlu_dist_options_t *options, int_t n, SuperMatrix *A, zScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, zLUstruct_t *LUstruct, gridinfo_t *grid, int *supernodeMask)
Definition: pzdistribute.c:3422
int zinitDiagFactBufs(int ldt, zdiagFactBufs_t *dFBuf)
void znewTrfPartitionInit(int_t nsupers, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: z3DPartition.c:4
void zSolveFinalize(superlu_dist_options_t *, zSOLVEstruct_t *)
Release the resources used for the solution phase.
Definition: pzutil.c:1196
void zGenCSCLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, zLocalLU_t *, doublecomplex **, int_t **, int_t **, int_t *, int_t *)
int ztrsv_(char *, char *, char *, int *, doublecomplex *, int *, doublecomplex *, int *)
int pzgssvx3d_csc_batch(superlu_dist_options_t *, int batchCount, int m, int n, int nnz, int nrhs, handle_t *, doublecomplex **RHSptr, int *ldRHS, double **ReqPtr, double **CeqPtr, int **RpivPtr, int **CpivPtr, DiagScale_t *DiagScale, handle_t *F, doublecomplex **Xptr, int *ldX, double **Berrs, gridinfo3d_t *grid3d, SuperLUStat_t *stat, int *info)
Solve a batch of linear systems Ai * Xi = Bi with direct method, computing the LU factorization of ea...
Definition: pzgssvx3d_csc_batch.c:80
double * doubleCalloc_dist(int_t)
Definition: dmemory_dist.c:162
void pzgsmv_finalize(pzgsmv_comm_t *)
Definition: pzgsmv.c:373
int_t znonLeafForestBackSolve3d(int_t treeId, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *x, doublecomplex *lsum, zxT_struct *xT_s, doublecomplex *recvbuf, MPI_Request *send_req, int nrhs, zlsumBmod_buff_t *lbmod_buf, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:4199
int zread_binary(FILE *, int_t *, int_t *, int_t *, doublecomplex **, int_t **, int_t **)
Definition: zbinary_io.c:4
void pzgstrf2_trsm(superlu_dist_options_t *options, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, zLocalLU_t *, MPI_Request *, int tag_ub, SuperLUStat_t *, int *info)
Definition: pzgstrf2.c:142
int_t zscatter3dUPanels(int_t nsupers, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
int_t pzgsTrForwardSolve3d_newsolve(superlu_dist_options_t *options, int_t n, zLUstruct_t *LUstruct, zScalePermstruct_t *ScalePermstruct, ztrf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, doublecomplex *x3d, doublecomplex *lsum3d, doublecomplex *recvbuf, MPI_Request *send_req, int nrhs, zSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:7477
int_t zreduceAncestors3d(int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, doublecomplex *Lval_buf, doublecomplex *Uval_buf, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t zzeroSetLU(int_t nnodes, int_t *nodeList, zLUstruct_t *, gridinfo3d_t *)
void zlsum_fmod_inv(doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, int, int_t, int *fmod, int_t *, gridinfo_t *, zLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int_t, int_t, int, int)
Definition: pzgstrs_lsum.c:427
zLUValSubBuf_t ** zLluBufInitArr(int_t numLA, zLUstruct_t *LUstruct)
void zZeroLblocks(int, int, gridinfo_t *, zLUstruct_t *)
Sets all entries of matrix L to zero.
Definition: zutil_dist.c:777
void zgather_u(int_t num_u_blks, Ublock_info_t *Ublock_info, int_t *usub, doublecomplex *uval, doublecomplex *bigU, int_t ldu, int_t *xsup, int_t klst)
int pzgsmv_AXglobal(int_t, int_t[], doublecomplex[], int_t[], doublecomplex[], doublecomplex[])
Definition: pzgsmv_AXglobal.c:258
void zRgather_L(int_t k, int_t *lsub, doublecomplex *lusup, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno)
int_t zIRecv_LDiagBlock(int_t k0, doublecomplex *L_blk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int)
ztrf3Dpartition_t * zinitTrf3Dpartition(int_t nsupers, superlu_dist_options_t *options, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t zSchurComplementSetup(int_t k, int *msgcnt, Ublock_info_t *, Remain_info_t *, uPanelInfo_t *, lPanelInfo_t *, int_t *, int_t *, int_t *, doublecomplex *bigU, int_t *Lsub_buf, doublecomplex *Lval_buf, int_t *Usub_buf, doublecomplex *Uval_buf, gridinfo_t *, zLUstruct_t *)
int_t zreduceSolvedX_newsolve(int_t treeId, int_t sender, int_t receiver, doublecomplex *x, int nrhs, ztrf3Dpartition_t *trf3Dpartition, zLUstruct_t *LUstruct, gridinfo3d_t *grid3d, doublecomplex *recvbuf, xtrsTimer_t *xtrsTimer)
Definition: pzgstrs3d.c:1492
void zScaleAdd_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *, doublecomplex)
Scale and add: adds a scalar multiple of one matrix to another. A_{i,j} = c * A_{i,...
Definition: zutil_dist.c:420
int_t zReDistribute_A(SuperMatrix *A, zScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno, gridinfo_t *grid, int_t *colptr[], int_t *rowind[], doublecomplex *a[])
Definition: pzdistribute.c:66
void zlsum_fmod_inv_master(doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, int, int, int_t, int *fmod, int_t, int_t *, gridinfo_t *, zLocalLU_t *, SuperLUStat_t **, int_t, int_t, int_t, int_t, int, int)
Definition: pzgstrs_lsum.c:992
int zLluBufFreeArr(int_t numLA, zLUValSubBuf_t **LUvsbs)
int_t zWaitL(int_t k, int *msgcnt, int *msgcntU, MPI_Request *, MPI_Request *, gridinfo_t *, zLUstruct_t *, SCT_t *)
Mtype_t
Definition: supermatrix.h:42
Dtype_t
Definition: supermatrix.h:35
Stype_t
Definition: supermatrix.h:22
int j
Definition: sutil_dist.c:287