SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
superlu_sdefs.h
Go to the documentation of this file.
1
28#ifndef __SUPERLU_SDEFS /* allow multiple inclusions */
29#define __SUPERLU_SDEFS
30
31/*
32 * File name: superlu_sdefs.h
33 * Purpose: Distributed SuperLU data types and function prototypes
34 * History:
35 */
36
37#include "superlu_defs.h"
38
39/*
40 *-- The structure used to store matrix A of the linear system and
41 * several vectors describing the transformations done to matrix A.
42 *
43 * A (SuperMatrix*)
44 * Matrix A in A*X=B, of dimension (A->nrow, A->ncol).
45 * The number of linear equations is A->nrow. The type of A can be:
46 * Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
47 *
48 * DiagScale (DiagScale_t)
49 * Specifies the form of equilibration that was done.
50 * = NOEQUIL: No equilibration.
51 * = ROW: Row equilibration, i.e., A was premultiplied by diag(R).
52 * = COL: Column equilibration, i.e., A was postmultiplied by diag(C).
53 * = BOTH: Both row and column equilibration, i.e., A was replaced
54 * by diag(R)*A*diag(C).
55 *
56 * R float*, dimension (A->nrow)
57 * The row scale factors for A.
58 * If DiagScale = ROW or BOTH, A is multiplied on the left by diag(R).
59 * If DiagScale = NOEQUIL or COL, R is not defined.
60 *
61 * C float*, dimension (A->ncol)
62 * The column scale factors for A.
63 * If DiagScale = COL or BOTH, A is multiplied on the right by diag(C).
64 * If DiagScale = NOEQUIL or ROW, C is not defined.
65 *
66 * perm_r (int*) dimension (A->nrow)
67 * Row permutation vector which defines the permutation matrix Pr,
68 * perm_r[i] = j means row i of A is in position j in Pr*A.
69 *
70 * perm_c (int*) dimension (A->ncol)
71 * Column permutation vector, which defines the
72 * permutation matrix Pc; perm_c[i] = j means column i of A is
73 * in position j in A*Pc.
74 *
75 */
76typedef struct {
77 DiagScale_t DiagScale;
78 float *R;
79 float *C;
80 int_t *perm_r;
81 int_t *perm_c;
83
84#if 0 // Sherry: move to superlu_defs.h
85/*-- Auxiliary data type used in PxGSTRS/PxGSTRS1. */
86typedef struct {
87 int_t lbnum; /* Row block number (local). */
88 int_t indpos; /* Starting position in Uindex[]. */
90#endif
91
92/*
93 * On each processor, the blocks in L are stored in compressed block
94 * column format, the blocks in U are stored in compressed block row format.
95 */
96#define MAX_LOOKAHEADS 50
97typedef struct {
98 int_t **Lrowind_bc_ptr; /* size ceil(NSUPERS/Pc);
99 free'd in distribution routinies */
100 int_t *Lrowind_bc_dat; /* size sum of sizes of Lrowind_bc_ptr[lk]) */
101 long int *Lrowind_bc_offset; /* size ceil(NSUPERS/Pc) */
102 long int Lrowind_bc_cnt;
103
104 float **Lnzval_bc_ptr; /* size ceil(NSUPERS/Pc);
105 free'd in distribution routinies */
106 float *Lnzval_bc_dat; /* size sum of sizes of Lnzval_bc_ptr[lk]) */
107 long int *Lnzval_bc_offset; /* size ceil(NSUPERS/Pc) */
108 long int Lnzval_bc_cnt;
109
110 float **Linv_bc_ptr; /* size ceil(NSUPERS/Pc);
111 free'd in distribution routinies */
112 float *Linv_bc_dat; /* size sum of sizes of Linv_bc_ptr[lk]) */
113 long int *Linv_bc_offset; /* size ceil(NSUPERS/Pc) */
114 long int Linv_bc_cnt;
115
116 int_t **Lindval_loc_bc_ptr; /* size ceil(NSUPERS/Pc);
117 pointers to locations in Lrowind_bc_ptr and Lnzval_bc_ptr;
118 free'd in distribution routinies */
119
120 int_t *Lindval_loc_bc_dat; /* size: sum of sizes of Lindval_loc_bc_ptr[lk]) */
121 long int *Lindval_loc_bc_offset; /* size ceil(NSUPERS/Pc) */
122 long int Lindval_loc_bc_cnt;
123
124 /* for new U format -> */
125 int_t **Ucolind_bc_ptr; /* size ceil(NSUPERS/Pc) */
126 int_t *Ucolind_bc_dat; /* size: sum of sizes of Ucolind_bc_ptr[lk]) */
127 int64_t *Ucolind_bc_offset; /* size ceil(NSUPERS/Pc) */
129
130 float **Unzval_bc_ptr; /* size ceil(NSUPERS/Pc) */
131 float *Unzval_bc_dat; /* size: sum of sizes of Unzval_bc_ptr[lk]) */
132 int64_t *Unzval_bc_offset; /* size ceil(NSUPERS/Pc) */
134
135 int_t **Uindval_loc_bc_ptr; /* size ceil(NSUPERS/Pc) pointers to locations in Ucolind_bc_ptr and Unzval_bc_ptr */
136 int_t *Uindval_loc_bc_dat; /* size: sum of sizes of Uindval_loc_bc_ptr[lk]) */
137 int64_t *Uindval_loc_bc_offset; /* size ceil(NSUPERS/Pc) */
139 /* end for new U format <- */
140
141 int_t *Unnz; /* number of nonzeros per block column in U*/
142 int_t **Lrowind_bc_2_lsum; /* size ceil(NSUPERS/Pc) map indices of Lrowind_bc_ptr to indices of lsum */
143 float **Uinv_bc_ptr; /* size ceil(NSUPERS/Pc) */
144 float *Uinv_bc_dat; /* size sum of sizes of Linv_bc_ptr[lk]) */
145 long int *Uinv_bc_offset; /* size ceil(NSUPERS/Pc) */
146 long int Uinv_bc_cnt;
147
148 int_t **Ufstnz_br_ptr; /* size ceil(NSUPERS/Pr) */
149 int_t *Ufstnz_br_dat; /* size sum of sizes of Ufstnz_br_ptr[lk]) */
150 long int *Ufstnz_br_offset; /* size ceil(NSUPERS/Pr) */
151 long int Ufstnz_br_cnt;
152
153 float **Unzval_br_ptr; /* size ceil(NSUPERS/Pr) */
154 float *Unzval_br_dat; /* size sum of sizes of Unzval_br_ptr[lk]) */
155 long int *Unzval_br_offset; /* size ceil(NSUPERS/Pr) */
156 long int Unzval_br_cnt;
157
158 /*-- Data structures used for broadcast and reduction trees. --*/
159 C_Tree *LBtree_ptr; /* size ceil(NSUPERS/Pc) */
160 C_Tree *LRtree_ptr; /* size ceil(NSUPERS/Pr) */
161 C_Tree *UBtree_ptr; /* size ceil(NSUPERS/Pc) */
162 C_Tree *URtree_ptr; /* size ceil(NSUPERS/Pr) */
163#if 0
164 int_t *Lsub_buf; /* Buffer for the remote subscripts of L */
165 float *Lval_buf; /* Buffer for the remote nonzeros of L */
166 int_t *Usub_buf; /* Buffer for the remote subscripts of U */
167 float *Uval_buf; /* Buffer for the remote nonzeros of U */
168#endif
169 int_t *Lsub_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote subscripts of L*/
170 float *Lval_buf_2[MAX_LOOKAHEADS]; /* Buffers for the remote nonzeros of L */
171 int_t *Usub_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote subscripts of U */
172 float *Uval_buf_2[MAX_LOOKAHEADS]; /* Buffer for the remote nonzeros of U */
173 float *ujrow; /* used in panel factorization. */
174 int_t bufmax[NBUFFERS]; /* Maximum buffer size across all MPI ranks:
175 * 0 : maximum size of Lsub_buf[]
176 * 1 : maximum size of Lval_buf[]
177 * 2 : maximum size of Usub_buf[]
178 * 3 : maximum size of Uval_buf[]
179 * 4 : maximum size of tempv[LDA]
180 */
181
182 /*-- Record communication schedule for factorization. --*/
183 int *ToRecv; /* Recv from no one (0), left (1), and up (2).*/
184 int *ToSendD; /* Whether need to send down block row. */
185 int **ToSendR; /* List of processes to send right block col. */
186
187 /*-- Record communication schedule for forward/back solves. --*/
188 /* 1/15/22 Sherry: changed int_t to int type */
189 int *fmod; /* Modification count for L-solve */
190 int **fsendx_plist; /* Column process list to send down Xk */
191 int *frecv; /* Modifications to be recv'd in proc row */
192 int nfrecvx; /* Number of Xk I will receive in L-solve */
193 int nfsendx; /* Number of Xk I will send in L-solve */
194 int *bmod; /* Modification count for U-solve */
195 int **bsendx_plist; /* Column process list to send down Xk */
196 int *brecv; /* Modifications to be recv'd in proc row */
197 int nbrecvx; /* Number of Xk I will receive in U-solve */
198 int nbsendx; /* Number of Xk I will send in U-solve */
199 int *mod_bit; /* Flag contribution from each row blocks */
200
201 /*-- Auxiliary arrays used for forward/back solves. --*/
202 int_t *ilsum; /* Starting position of each supernode in lsum
203 (local) */
204 int_t ldalsum; /* LDA of lsum (local) */
205 int_t SolveMsgSent; /* Number of actual messages sent in LU-solve */
206 int_t SolveMsgVol; /* Volume of messages sent in the solve phase */
207
208
209 /*********************/
210 /* The following variables are used in the hybrid solver */
211
212 /*-- Counts to be used in U^{-T} triangular solve. -- */
213 int_t UT_SOLVE;
214 int_t L_SOLVE;
215 int_t FRECV;
216 int_t ut_ldalsum; /* LDA of lsum (local) */
217 int_t *ut_ilsum; /* ilsum in column-wise */
218 int_t *utmod; /* Modification count for Ut-solve. */
219 int_t **ut_sendx_plist; /* Row process list to send down Xk */
220 int_t *utrecv; /* Modifications to be recev'd in proc column. */
221 int_t n_utsendx; /* Number of Xk I will receive */
222 int_t n_utrecvx; /* Number of Xk I will send */
223 int_t n_utrecvmod;
224 int_t nroot;
225 int_t *ut_modbit;
226 int_t *Urbs;
227 Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */
228 Ucb_indptr_t *Ucb_inddat;
229 long int *Ucb_indoffset;
230 long int Ucb_indcnt;
231
232 int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */
233 int_t *Ucb_valdat;
234 long int *Ucb_valoffset;
235 long int Ucb_valcnt;
236
237 /* some additional counters for L solve */
238 int_t n;
239 int_t nleaf;
240 int_t nfrecvmod;
241 int_t inv; /* whether the diagonal block is inverted*/
242
243#ifdef GPU_ACC
244 /* The following variables are used in GPU trisolve */
245
246 int_t *d_Lrowind_bc_dat;
247 long int *d_Lrowind_bc_offset;
248 float *d_Lnzval_bc_dat;
249 long int *d_Lnzval_bc_offset;
250 int_t *d_Ucolind_bc_dat;
251 int64_t *d_Ucolind_bc_offset;
252 float *d_Unzval_bc_dat;
253 long int *d_Unzval_bc_offset;
254
255 float *d_Linv_bc_dat ;
256 float *d_Uinv_bc_dat ;
257 long int *d_Linv_bc_offset ;
258 long int *d_Uinv_bc_offset ;
259 int_t *d_Lindval_loc_bc_dat ;
260 int64_t *d_Lindval_loc_bc_offset ;
261 int_t *d_Uindval_loc_bc_dat ;
262 int64_t *d_Uindval_loc_bc_offset ;
263
264 // long int *d_Lindval_loc_bc_offset ;
265 // int_t *d_Urbs;
266 // int_t *d_Ufstnz_br_dat;
267 // long int *d_Ufstnz_br_offset;
268 // float *d_Unzval_br_dat;
269 // long int *d_Unzval_br_offset;
270 // int_t *d_Ucb_valdat;
271 // long int *d_Ucb_valoffset;
272 // Ucb_indptr_t *d_Ucb_inddat;
273 // long int *d_Ucb_indoffset;
274
275 int_t *d_ilsum ;
276 int_t *d_xsup ;
277 C_Tree *d_LBtree_ptr ;
278 C_Tree *d_LRtree_ptr ;
279 C_Tree *d_UBtree_ptr ;
280 C_Tree *d_URtree_ptr ;
281#endif
282
283} sLocalLU_t;
284
285
286typedef struct {
287 int_t *etree;
288 Glu_persist_t *Glu_persist;
289 sLocalLU_t *Llu;
290 char dt;
292
293
294/*-- Data structure for communication during matrix-vector multiplication. */
295typedef struct {
296 int_t *extern_start;
297 int_t *ind_tosend; /* X indeices to be sent to other processes */
298 int_t *ind_torecv; /* X indeices to be received from other processes */
299 int_t *ptr_ind_tosend;/* Printers to ind_tosend[] (Size procs)
300 (also point to val_torecv) */
301 int_t *ptr_ind_torecv;/* Printers to ind_torecv[] (Size procs)
302 (also point to val_tosend) */
303 int *SendCounts; /* Numbers of X indices to be sent
304 (also numbers of X values to be received) */
305 int *RecvCounts; /* Numbers of X indices to be received
306 (also numbers of X values to be sent) */
307 void *val_tosend; /* X values to be sent to other processes */
308 void *val_torecv; /* X values to be received from other processes */
309 int_t TotalIndSend; /* Total number of indices to be sent
310 (also total number of values to be received) */
311 int_t TotalValSend; /* Total number of values to be sent.
312 (also total number of indices to be received) */
314
315/*-- Data structure holding the information for the solution phase --*/
316typedef struct {
317 int_t *row_to_proc;
318 int_t *inv_perm_c;
319 int_t num_diag_procs, *diag_procs, *diag_len;
320 psgsmv_comm_t *gsmv_comm; /* communication metadata for SpMV,
321 required by IterRefine. */
322 pxgstrs_comm_t *gstrs_comm; /* communication metadata for SpTRSV. */
323 int_t *A_colind_gsmv; /* After psgsmv_init(), the global column
324 indices of A are translated into the relative
325 positions in the gathered x-vector.
326 This is re-used in repeated calls to psgsmv() */
327 int_t *xrow_to_proc; /* used by PDSLin */
328 NRformat_loc3d* A3d; /* Point to 3D {A, B} gathered on 2D layer 0.
329 This needs to be peresistent between
330 3D factorization and solve. */
332
333
334
335/*==== For 3D code ====*/
336
337// new structures for pdgstrf_4_8
338
339#if 0 // Sherry: moved to superlu_defs.h
340typedef struct
341{
342 int_t nub;
343 int_t klst;
344 int_t ldu;
345 int_t* usub;
346 float* uval;
348
349typedef struct
350{
351 int_t *lsub;
352 float *lusup;
353 int_t luptr0;
354 int_t nlb; //number of l blocks
355 int_t nsupr;
357
358/* HyP_t is the data structure to assist HALO offload of Schur-complement. */
359typedef struct
360{
361 Remain_info_t *lookAhead_info, *Remain_info;
362 Ublock_info_t *Ublock_info, *Ublock_info_Phi;
363
364 int_t first_l_block_acc , first_u_block_acc;
365 int_t last_offload ;
366 int_t *Lblock_dirty_bit, * Ublock_dirty_bit;
367 float *lookAhead_L_buff, *Remain_L_buff;
368 int_t lookAheadBlk; /* number of blocks in look-ahead window */
369 int_t RemainBlk ; /* number of blocks outside look-ahead window */
370 int_t num_look_aheads, nsupers;
371 int_t ldu, ldu_Phi;
372 int_t num_u_blks, num_u_blks_Phi;
373
374 int_t jj_cpu;
375 float *bigU_Phi;
376 float *bigU_host;
377 int_t Lnbrow;
378 int_t Rnbrow;
379
380 int_t buffer_size;
381 int_t bigu_size;
382 int offloadCondition;
383 int superlu_acc_offload;
384 int nGPUStreams;
385} HyP_t;
386
387#endif // Above are moved to superlu_defs.h
388
389
390typedef struct
391{
392 int_t * Lsub_buf ;
393 float * Lval_buf ;
394 int_t * Usub_buf ;
395 float * Uval_buf ;
397
399 int_t knsupc,
400 HyP_t* HyP,
401 SCT_t* SCT,
402 SuperLUStat_t *stat
403 );
404
405typedef struct
406{
417
418typedef struct
419{
420 float *bigU;
421 float *bigV;
422} sscuBufs_t;
423
424typedef struct
425{
426 float* BlockLFactor;
427 float* BlockUFactor;
429
430/*=====================*/
431
432/***********************************************************************
433 * Function prototypes
434 ***********************************************************************/
435
436#ifdef __cplusplus
437extern "C" {
438#endif
439
440
441/* Supernodal LU factor related */
442extern void
445extern void
447 int_t, float *, int_t *, int_t *,
449extern void
451 float **, int_t **, int_t **);
452extern int
454 SuperMatrix *);
455extern void
457extern void
460extern void
462 int_t *, int_t *, int_t *, int_t *, int_t *,
464extern void
466 float *, int_t);
467
468extern void sallocateA_dist (int_t, int_t, float **, int_t **, int_t **);
469extern void sGenXtrue_dist (int_t, int_t, float *, int_t);
470extern void sFillRHS_dist (char *, int_t, float *, int_t,
471 SuperMatrix *, float *, int_t);
472extern int screate_matrix(SuperMatrix *, int, float **, int *,
473 float **, int *, FILE *, gridinfo_t *);
474extern int screate_matrix_rb(SuperMatrix *, int, float **, int *,
475 float **, int *, FILE *, gridinfo_t *);
476extern int screate_matrix_dat(SuperMatrix *, int, float **, int *,
477 float **, int *, FILE *, gridinfo_t *);
478extern int screate_matrix_postfix(SuperMatrix *, int, float **, int *,
479 float **, int *, FILE *, char *, gridinfo_t *);
480
481extern void sScalePermstructInit(const int_t, const int_t,
484
485/* Driver related */
486extern void sgsequ_dist (SuperMatrix *, float *, float *, float *,
487 float *, float *, int_t *);
488extern float slangs_dist (char *, SuperMatrix *);
489extern void slaqgs_dist (SuperMatrix *, float *, float *, float,
490 float, float, char *);
491extern void psgsequ (SuperMatrix *, float *, float *, float *,
492 float *, float *, int_t *, gridinfo_t *);
493extern float pslangs (char *, SuperMatrix *, gridinfo_t *);
494extern void pslaqgs (SuperMatrix *, float *, float *, float,
495 float, float, char *);
496extern int psPermute_Dense_Matrix(int_t, int_t, int_t [], int_t[],
497 float [], int, float [], int, int,
498 gridinfo_t *);
499
500extern int sp_strsv_dist (char *, char *, char *, SuperMatrix *,
501 SuperMatrix *, float *, int *);
502extern int sp_sgemv_dist (char *, float, SuperMatrix *, float *,
503 int, float, float *, int);
504extern int sp_sgemm_dist (char *, int, float, SuperMatrix *,
505 float *, int, float, float *, int);
506
511 sScalePermstruct_t *, float *,
512 int, int, gridinfo_t *, sLUstruct_t *, float *,
513 SuperLUStat_t *, int *);
518 sScalePermstruct_t *, float *,
519 int, int, gridinfo_t *, sLUstruct_t *,
520 sSOLVEstruct_t *, float *, SuperLUStat_t *, int *);
527 int_t [], int_t [], gridinfo_t *grid,
529extern void pxgstrs_finalize(pxgstrs_comm_t *);
530extern int sldperm_dist(int, int, int_t, int_t [], int_t [],
531 float [], int_t *, float [], float []);
532extern int sstatic_schedule(superlu_dist_options_t *, int, int,
534 int_t *, int_t *, int *);
535extern void sLUstructInit(const int_t, sLUstruct_t *);
536extern void sLUstructFree(sLUstruct_t *);
537extern void sDestroy_LU(int_t, gridinfo_t *, sLUstruct_t *);
538extern void sDestroy_Tree(int_t, gridinfo_t *, sLUstruct_t *);
539extern void sscatter_l (int ib, int ljb, int nsupc, int_t iukp, int_t* xsup,
540 int klst, int nbrow, int_t lptr, int temp_nbrow,
541 int_t* usub, int_t* lsub, float *tempv,
542 int* indirect_thread, int* indirect2,
543 int_t ** Lrowind_bc_ptr, float **Lnzval_bc_ptr,
544 gridinfo_t * grid);
545extern void sscatter_u (int ib, int jb, int nsupc, int_t iukp, int_t * xsup,
546 int klst, int nbrow, int_t lptr, int temp_nbrow,
547 int_t* lsub, int_t* usub, float* tempv,
548 int_t ** Ufstnz_br_ptr, float **Unzval_br_ptr,
549 gridinfo_t * grid);
550extern int_t psgstrf(superlu_dist_options_t *, int, int, float anorm,
552
553/* #define GPU_PROF
554#define IPM_PROF */
555
556/* Solve related */
559 float *, int_t, int, SuperLUStat_t *, int *);
562 float *, int_t, int_t, int_t, int, sSOLVEstruct_t *,
563 SuperLUStat_t *, int *);
564extern void psgstrf2_trsm(superlu_dist_options_t * options, int_t k0, int_t k,
565 double thresh, Glu_persist_t *, gridinfo_t *,
566 sLocalLU_t *, MPI_Request *, int tag_ub,
567 SuperLUStat_t *, int *info);
568extern void psgstrs2_omp(int_t k0, int_t k, Glu_persist_t *, gridinfo_t *,
570extern int_t psReDistribute_B_to_X(float *B, int_t m_loc, int nrhs, int_t ldb,
571 int_t fst_row, int_t *ilsum, float *x,
574extern void slsum_fmod(float *, float *, float *, float *,
575 int, int, int_t , int *fmod, int_t, int_t, int_t,
577 MPI_Request [], SuperLUStat_t *);
578extern void slsum_bmod(float *, float *, float *,
579 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
580 int_t **, int_t *, gridinfo_t *, sLocalLU_t *,
581 MPI_Request [], SuperLUStat_t *);
582
583extern void slsum_fmod_inv(float *, float *, float *, float *,
584 int, int_t , int *fmod,
586 SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int_t, int_t, int, int);
587extern void slsum_fmod_inv_master(float *, float *, float *, float *,
588 int, int, int_t , int *fmod, int_t,
590 SuperLUStat_t **, int_t, int_t, int_t, int_t, int, int);
591extern void slsum_bmod_inv(float *, float *, float *, float *,
592 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
593 int_t **, int_t *, gridinfo_t *, sLocalLU_t *,
594 SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int, int);
595extern void slsum_bmod_inv_master(float *, float *, float *, float *,
596 int, int_t, int *bmod, int_t *, Ucb_indptr_t **,
597 int_t **, int_t *, gridinfo_t *, sLocalLU_t *,
598 SuperLUStat_t **, int_t, int_t, int, int);
599
600extern void sComputeLevelsets(int , int_t , gridinfo_t *,
602
603#ifdef GPU_ACC
604extern void psconvertU(superlu_dist_options_t *, gridinfo_t *, sLUstruct_t *, SuperLUStat_t *, int);
605extern void slsum_fmod_inv_gpu_wrap(int_t, int_t, int_t, int_t, float *, float *, int, int, int_t , int *fmod, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, float *, int64_t *, float *, int64_t *, int_t *, int64_t *, int_t *, gridinfo_t *, float * , float * , int_t );
606extern void slsum_bmod_inv_gpu_wrap(superlu_dist_options_t *, int_t, int_t, int_t, int_t, float *, float *, int, int, int_t , int *bmod, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, float *, int64_t *, float *, int64_t *, int_t *, int64_t *,int_t *,gridinfo_t *);
607#endif
608
610 SuperMatrix *, float, sLUstruct_t *,
612 float [], int_t, float [], int_t, int,
613 sSOLVEstruct_t *, float *, SuperLUStat_t *, int *);
615 SuperMatrix *, float, sLUstruct_t *,
616 gridinfo_t *, float *, int_t, float *, int_t,
617 int, float *, SuperLUStat_t *, int *);
619 gridinfo_t *, int_t *, int_t *[],
620 float *[], int_t *[], int_t []);
621extern int psgsmv_AXglobal(int_t, int_t [], float [], int_t [],
622 float [], float []);
623extern int psgsmv_AXglobal_abs(int_t, int_t [], float [], int_t [],
624 float [], float []);
625extern void psgsmv_init(SuperMatrix *, int_t *, gridinfo_t *,
626 psgsmv_comm_t *);
627extern void psgsmv(int_t, SuperMatrix *, gridinfo_t *, psgsmv_comm_t *,
628 float x[], float ax[]);
629extern void psgsmv_finalize(psgsmv_comm_t *);
630
631/* Memory-related */
632extern float *floatMalloc_dist(int_t);
633extern float *floatCalloc_dist(int_t);
634extern void *duser_malloc_dist (int_t, int_t);
635extern void duser_free_dist (int_t, int_t);
638
639/* Auxiliary routines */
640
646extern void sZeroLblocks(int, int, gridinfo_t *, sLUstruct_t *);
647extern void sZeroUblocks(int iam, int n, gridinfo_t *, sLUstruct_t *);
648extern void sfill_dist (float *, int_t, float);
649extern void sinf_norm_error_dist (int_t, int_t, float*, int_t,
650 float*, int_t, gridinfo_t*);
651extern void psinf_norm_error(int, int_t, int_t, float [], int_t,
652 float [], int_t , MPI_Comm);
653extern void sreadhb_dist (int, FILE *, int_t *, int_t *, int_t *,
654 float **, int_t **, int_t **);
655extern void sreadtriple_dist(FILE *, int_t *, int_t *, int_t *,
656 float **, int_t **, int_t **);
657extern void sreadtriple_noheader(FILE *, int_t *, int_t *, int_t *,
658 float **, int_t **, int_t **);
659extern void sreadrb_dist(int, FILE *, int_t *, int_t *, int_t *,
660 float **, int_t **, int_t **);
661extern void sreadMM_dist(FILE *, int_t *, int_t *, int_t *,
662 float **, int_t **, int_t **);
663extern int sread_binary(FILE *, int_t *, int_t *, int_t *,
664 float **, int_t **, int_t **);
665
666/* Distribute the data for numerical factorization */
670extern void psGetDiagU(int_t, sLUstruct_t *, gridinfo_t *, float *);
671
673
674/* Routines for debugging */
675extern void sPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *,
676 sLocalLU_t *);
677extern void sPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *,
678 sLocalLU_t *);
683extern void Printfloat5(char *, int_t, float *);
684extern int file_Printfloat5(FILE *, char *, int_t, float *);
685
686extern void sGenCOOLblocks(int, int_t, gridinfo_t*,
687 Glu_persist_t*, sLocalLU_t *, int_t** , int_t** , float ** , int_t* , int_t* );
688extern void sGenCSCLblocks(int, int_t, gridinfo_t*,
689 Glu_persist_t*, sLocalLU_t *, float **, int_t **, int_t **, int_t*, int_t*);
690extern void sGenCSRLblocks(int, int_t, gridinfo_t*,
691 Glu_persist_t*, sLocalLU_t *, float **, int_t **, int_t **, int_t*, int_t*);
692
693
694/* BLAS */
695
696#ifdef USE_VENDOR_BLAS
697extern void sgemm_(const char*, const char*, const int*, const int*, const int*,
698 const float*, const float*, const int*, const float*,
699 const int*, const float*, float*, const int*, int, int);
700extern void strsv_(char*, char*, char*, int*, float*, int*,
701 float*, int*, int, int, int);
702extern void strsm_(const char*, const char*, const char*, const char*,
703 const int*, const int*, const float*, const float*, const int*,
704 float*, const int*, int, int, int, int);
705extern void sgemv_(const char *, const int *, const int *, const float *,
706 const float *a, const int *, const float *, const int *,
707 const float *, float *, const int *, int);
708
709#else
710extern int sgemm_(const char*, const char*, const int*, const int*, const int*,
711 const float*, const float*, const int*, const float*,
712 const int*, const float*, float*, const int*);
713extern int strsv_(char*, char*, char*, int*, float*, int*,
714 float*, int*);
715extern int strsm_(const char*, const char*, const char*, const char*,
716 const int*, const int*, const float*, const float*, const int*,
717 float*, const int*);
718extern void sgemv_(const char *, const int *, const int *, const float *,
719 const float *a, const int *, const float *, const int *,
720 const float *, float *, const int *);
721#endif
722
723extern void sger_(const int*, const int*, const float*,
724 const float*, const int*, const float*, const int*,
725 float*, const int*);
726
727extern int sscal_(const int *n, const float *alpha, float *dx, const int *incx);
728extern int saxpy_(const int *n, const float *alpha, const float *x,
729 const int *incx, float *y, const int *incy);
730
731/* SuperLU BLAS interface: ssuperlu_blas.c */
732extern int superlu_sgemm(const char *transa, const char *transb,
733 int m, int n, int k, float alpha, float *a,
734 int lda, float *b, int ldb, float beta, float *c, int ldc);
735extern int superlu_strsm(const char *sideRL, const char *uplo,
736 const char *transa, const char *diag, const int m, const int n,
737 const float alpha, const float *a,
738 const int lda, float *b, const int ldb);
739extern int superlu_sger(const int m, const int n, const float alpha,
740 const float *x, const int incx, const float *y,
741 const int incy, float *a, const int lda);
742extern int superlu_sscal(const int n, const float alpha, float *x, const int incx);
743extern int superlu_saxpy(const int n, const float alpha,
744 const float *x, const int incx, float *y, const int incy);
745extern int superlu_sgemv(const char *trans, const int m,
746 const int n, const float alpha, const float *a,
747 const int lda, const float *x, const int incx,
748 const float beta, float *y, const int incy);
749extern int superlu_strsv(char *uplo, char *trans, char *diag,
750 int n, float *a, int lda, float *x, int incx);
751
752#ifdef SLU_HAVE_LAPACK
753extern void strtri_(char*, char*, int*, float*, int*, int*);
754#endif
755
756/*==== For 3D code ====*/
757extern int screate_matrix3d(SuperMatrix *A, int nrhs, float **rhs,
758 int *ldb, float **x, int *ldx,
759 FILE *fp, gridinfo3d_t *grid3d);
760extern int screate_matrix_postfix3d(SuperMatrix *A, int nrhs, float **rhs,
761 int *ldb, float **x, int *ldx,
762 FILE *fp, char * postfix, gridinfo3d_t *grid3d);
763
764/* Matrix distributed in NRformat_loc in 3D process grid. It converts
765 it to a NRformat_loc distributed in 2D grid in grid-0 */
766extern void sGatherNRformat_loc3d(fact_t Fact, NRformat_loc *A, float *B,
767 int ldb, int nrhs, gridinfo3d_t *grid3d,
768 NRformat_loc3d **);
769extern int sScatter_B3d(NRformat_loc3d *A3d, gridinfo3d_t *grid3d);
770
772 sScalePermstruct_t *, float B[], int ldb, int nrhs,
774 float *berr, SuperLUStat_t *, int *info);
775extern int_t psgstrf3d(superlu_dist_options_t *, int m, int n, float anorm,
777 gridinfo3d_t *, SuperLUStat_t *, int *);
778extern void sInit_HyP(HyP_t* HyP, sLocalLU_t *Llu, int_t mcb, int_t mrb );
779extern void Free_HyP(HyP_t* HyP);
780extern int updateDirtyBit(int_t k0, HyP_t* HyP, gridinfo_t* grid);
781
782 /* from scatter.h */
783extern void
785 Remain_info_t *Remain_info, float *L_mat, int ldl,
786 float *U_mat, int ldu, float *bigV,
787 // int_t jj0,
788 int_t knsupc, int_t klst,
789 int_t *lsub, int_t *usub, int_t ldt,
790 int_t thread_id,
791 int *indirect, int *indirect2,
792 int_t **Lrowind_bc_ptr, float **Lnzval_bc_ptr,
793 int_t **Ufstnz_br_ptr, float **Unzval_br_ptr,
794 int_t *xsup, gridinfo_t *, SuperLUStat_t *
795#ifdef SCATTER_PROFILE
796 , double *Host_TheadScatterMOP, double *Host_TheadScatterTimer
797#endif
798 );
799
800#ifdef _OPENMP
801/*this version uses a lock to prevent multiple thread updating the same block*/
802extern void
803sblock_gemm_scatter_lock( int_t lb, int_t j, omp_lock_t* lock,
804 Ublock_info_t *Ublock_info, Remain_info_t *Remain_info,
805 float *L_mat, int_t ldl, float *U_mat, int_t ldu,
806 float *bigV,
807 // int_t jj0,
808 int_t knsupc, int_t klst,
809 int_t *lsub, int_t *usub, int_t ldt,
810 int_t thread_id,
811 int *indirect, int *indirect2,
812 int_t **Lrowind_bc_ptr, float **Lnzval_bc_ptr,
813 int_t **Ufstnz_br_ptr, float **Unzval_br_ptr,
814 int_t *xsup, gridinfo_t *
815#ifdef SCATTER_PROFILE
816 , double *Host_TheadScatterMOP, double *Host_TheadScatterTimer
817#endif
818 );
819#endif
820
821extern int_t
823 int_t knsupc, int_t klst, int_t* lsub,
824 int_t * usub, int_t ldt,
825 int* indirect, int* indirect2,
826 HyP_t* HyP, sLUstruct_t *, gridinfo_t*,
827 SCT_t*SCT, SuperLUStat_t *
828 );
829extern int_t
831 int_t knsupc, int_t klst, int_t* lsub,
832 int_t * usub, int_t ldt,
833 int* indirect, int* indirect2,
834 HyP_t* HyP, sLUstruct_t *, gridinfo_t*,
835 SCT_t*SCT, SuperLUStat_t * );
836extern int_t
838 int_t knsupc, int_t klst, int_t* lsub,
839 int_t * usub, int_t ldt,
840 int* indirect, int* indirect2,
841 HyP_t* HyP, sLUstruct_t *, gridinfo_t*,
842 SCT_t*SCT, SuperLUStat_t * );
843extern int_t
845 int_t knsupc, int_t klst, int_t* lsub,
846 int_t * usub, int_t ldt,
847 int* indirect, int* indirect2,
848 HyP_t* HyP, sLUstruct_t *, gridinfo_t*,
849 SCT_t*SCT, SuperLUStat_t * );
850
851 /* from gather.h */
852extern void sgather_u(int_t num_u_blks,
853 Ublock_info_t *Ublock_info, int_t * usub,
854 float *uval, float *bigU, int_t ldu,
855 int_t *xsup, int_t klst /* for SuperSize */
856 );
857
858extern void sgather_l( int_t num_LBlk, int_t knsupc,
859 Remain_info_t *L_info,
860 float * lval, int_t LD_lval,
861 float * L_buff );
862
863extern void sRgather_L(int_t k, int_t *lsub, float *lusup, gEtreeInfo_t*,
865 int_t *myIperm, int_t *iperm_c_supno );
866extern void sRgather_U(int_t k, int_t jj0, int_t *usub, float *uval,
867 float *bigU, gEtreeInfo_t*, Glu_persist_t *,
868 gridinfo_t *, HyP_t *, int_t *myIperm,
869 int_t *iperm_c_supno, int_t *perm_u);
870
871 /* from xtrf3Dpartition.h */
873 superlu_dist_options_t *options,
874 sLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
875extern void sDestroy_trf3Dpartition(strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d);
876
877extern void s3D_printMemUse(strf3Dpartition_t* trf3Dpartition,
878 sLUstruct_t *LUstruct, gridinfo3d_t * grid3d);
879
880//extern int* getLastDep(gridinfo_t *grid, SuperLUStat_t *stat,
881// superlu_dist_options_t *options, sLocalLU_t *Llu,
882// int_t* xsup, int_t num_look_aheads, int_t nsupers,
883// int_t * iperm_c_supno);
884
885extern void sinit3DLUstructForest( int_t* myTreeIdxs, int_t* myZeroTrIdxs,
886 sForest_t** sForests, sLUstruct_t* LUstruct,
887 gridinfo3d_t* grid3d);
888
889extern int_t sgatherAllFactoredLUFr(int_t* myZeroTrIdxs, sForest_t* sForests,
890 sLUstruct_t* LUstruct, gridinfo3d_t* grid3d,
891 SCT_t* SCT );
892
893 /* The following are from pdgstrf2.h */
894extern int_t sLpanelUpdate(int_t off0, int_t nsupc, float* ublk_ptr,
895 int_t ld_ujrow, float* lusup, int_t nsupr, SCT_t*);
897 double thresh, float *BlockUFactor, Glu_persist_t *,
899 SuperLUStat_t *, int *info, SCT_t*);
900extern int_t sTrs2_GatherU(int_t iukp, int_t rukp, int_t klst,
901 int_t nsupc, int_t ldu, int_t *usub,
902 float* uval, float *tempv);
903extern int_t sTrs2_ScatterU(int_t iukp, int_t rukp, int_t klst,
904 int_t nsupc, int_t ldu, int_t *usub,
905 float* uval, float *tempv);
906extern int_t sTrs2_GatherTrsmScatter(int_t klst, int_t iukp, int_t rukp,
907 int_t *usub, float* uval, float *tempv,
908 int_t knsupc, int nsupr, float* lusup,
909 Glu_persist_t *Glu_persist) ;
910extern void psgstrs2
911#ifdef _CRAY
912(
913 int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid,
914 sLocalLU_t *Llu, SuperLUStat_t *stat, _fcd ftcs1, _fcd ftcs2, _fcd ftcs3
915);
916#else
917(
918 int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid,
919 sLocalLU_t *Llu, SuperLUStat_t *stat
920);
921#endif
922
923extern void psgstrf2(superlu_dist_options_t *, int_t nsupers, int_t k0,
924 int_t k, double thresh, Glu_persist_t *, gridinfo_t *,
925 sLocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *);
926
927 /* from p3dcomm.h */
928extern int_t sAllocLlu_3d(int_t nsupers, sLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
929extern int_t sp3dScatter(int_t n, sLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
931 sLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
933 sLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
934extern int_t scollect3dLpanels(int_t layer, int_t nsupers, sLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
935extern int_t scollect3dUpanels(int_t layer, int_t nsupers, sLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
936extern int_t sp3dCollect(int_t layer, int_t n, sLUstruct_t * LUstruct, gridinfo3d_t* grid3d);
937/*zero out LU non zero entries*/
938extern int_t szeroSetLU(int_t nnodes, int_t* nodeList , sLUstruct_t *, gridinfo3d_t*);
939extern int sAllocGlu_3d(int_t n, int_t nsupers, sLUstruct_t *);
941extern int sDeAllocGlu_3d(sLUstruct_t *);
942
943/* Reduces L and U panels of nodes in the List nodeList (size=nnnodes)
944receiver[L(nodelist)] =sender[L(nodelist)] +receiver[L(nodelist)]
945receiver[U(nodelist)] =sender[U(nodelist)] +receiver[U(nodelist)]
946*/
948 int_t nnodes, int_t* nodeList,
949 float* Lval_buf, float* Uval_buf,
950 sLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
951/*reduces all nodelists required in a level*/
952extern int sreduceAllAncestors3d(int_t ilvl, int_t* myNodeCount,
953 int_t** treePerm,
954 sLUValSubBuf_t* LUvsb,
955 sLUstruct_t* LUstruct,
956 gridinfo3d_t* grid3d,
957 SCT_t* SCT );
958/*
959 Copies factored L and U panels from sender grid to receiver grid
960 receiver[L(nodelist)] <-- sender[L(nodelist)];
961 receiver[U(nodelist)] <-- sender[U(nodelist)];
962*/
964 int_t nnodes, int_t *nodeList, sLUValSubBuf_t* LUvsb,
965 sLUstruct_t* LUstruct, gridinfo3d_t* grid3d,SCT_t* SCT );
966
967/*Gathers all the L and U factors to grid 0 for solve stage
968 By repeatidly calling above function*/
970 gridinfo3d_t* grid3d, SCT_t* SCT );
971
972/*Distributes data in each layer and initilizes ancestors
973 as zero in required nodes*/
974int_t sinit3DLUstruct( int_t* myTreeIdxs, int_t* myZeroTrIdxs,
975 int_t* nodeCount, int_t** nodeList,
976 sLUstruct_t* LUstruct, gridinfo3d_t* grid3d);
977
979 sLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
980int_t szRecvLPanel(int_t k, int_t sender, float alpha,
981 float beta, float* Lval_buf,
982 sLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
984 sLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
985int_t szRecvUPanel(int_t k, int_t sender, float alpha,
986 float beta, float* Uval_buf,
987 sLUstruct_t* LUstruct, gridinfo3d_t* grid3d, SCT_t* SCT);
988
989 /* from communication_aux.h */
991 gridinfo_t *, int* msgcnt, MPI_Request *,
992 int **ToSendR, int_t *xsup, int );
994 gridinfo_t *, int* msgcnt, int **ToSendR,
995 int_t *xsup , SCT_t*, int);
996extern int_t sIBcast_UPanel(int_t k, int_t k0, int_t* usub, float* uval,
997 gridinfo_t *, int* msgcnt, MPI_Request *,
998 int *ToSendD, int );
999extern int_t sBcast_UPanel(int_t k, int_t k0, int_t* usub, float* uval,
1000 gridinfo_t *, int* msgcnt, int *ToSendD, SCT_t*, int);
1001extern int_t sIrecv_LPanel (int_t k, int_t k0, int_t* Lsub_buf,
1002 float* Lval_buf, gridinfo_t *,
1003 MPI_Request *, sLocalLU_t *, int);
1004extern int_t sIrecv_UPanel(int_t k, int_t k0, int_t* Usub_buf, float*,
1005 sLocalLU_t *, gridinfo_t*, MPI_Request *, int);
1006extern int_t sWait_URecv(MPI_Request *, int* msgcnt, SCT_t *);
1007extern int_t sWait_LRecv(MPI_Request*, int* msgcnt, int* msgcntsU,
1008 gridinfo_t *, SCT_t*);
1009extern int_t sISend_UDiagBlock(int_t k0, float *ublk_ptr, int_t size,
1010 MPI_Request *, gridinfo_t *, int);
1011extern int_t sRecv_UDiagBlock(int_t k0, float *ublk_ptr, int_t size,
1012 int_t src, gridinfo_t *, SCT_t*, int);
1013extern int_t sPackLBlock(int_t k, float* Dest, Glu_persist_t *,
1014 gridinfo_t *, sLocalLU_t *);
1015extern int_t sISend_LDiagBlock(int_t k0, float *lblk_ptr, int_t size,
1016 MPI_Request *, gridinfo_t *, int);
1017extern int_t sIRecv_UDiagBlock(int_t k0, float *ublk_ptr, int_t size,
1018 int_t src, MPI_Request *, gridinfo_t *,
1019 SCT_t*, int);
1020extern int_t sIRecv_LDiagBlock(int_t k0, float *L_blk_ptr, int_t size,
1021 int_t src, MPI_Request *, gridinfo_t*, SCT_t*, int);
1022extern int_t sUDiagBlockRecvWait( int_t k, int_t* IrecvPlcd_D, int_t* factored_L,
1023 MPI_Request *, gridinfo_t *, sLUstruct_t *, SCT_t *);
1024extern int_t LDiagBlockRecvWait( int_t k, int_t* factored_U, MPI_Request *, gridinfo_t *);
1025
1026#if (MPI_VERSION>2)
1027extern int_t sIBcast_UDiagBlock(int_t k, float *ublk_ptr, int_t size,
1028 MPI_Request *, gridinfo_t *);
1029extern int_t sIBcast_LDiagBlock(int_t k, float *lblk_ptr, int_t size,
1030 MPI_Request *, gridinfo_t *);
1031#endif
1032
1033 /* from trfCommWrapper.h */
1035 float *BlockUFactor, float *BlockLFactor,
1036 int_t* IrecvPlcd_D, MPI_Request *, MPI_Request *,
1037 MPI_Request *, MPI_Request *, gridinfo_t *,
1038 superlu_dist_options_t *, double thresh,
1039 sLUstruct_t *LUstruct, SuperLUStat_t *, int *info,
1040 SCT_t *, int tag_ub);
1041extern int_t sUPanelTrSolve( int_t k, float* BlockLFactor, float* bigV,
1044extern int_t sLPanelUpdate(int_t k, int_t* IrecvPlcd_D, int_t* factored_L,
1045 MPI_Request *, float* BlockUFactor, gridinfo_t *,
1046 sLUstruct_t *, SCT_t *);
1047extern int_t sUPanelUpdate(int_t k, int_t* factored_U, MPI_Request *,
1048 float* BlockLFactor, float* bigV,
1051extern int_t sIBcastRecvLPanel(int_t k, int_t k0, int* msgcnt,
1052 MPI_Request *, MPI_Request *,
1053 int_t* Lsub_buf, float* Lval_buf,
1054 int * factored, gridinfo_t *, sLUstruct_t *,
1055 SCT_t *, int tag_ub);
1056extern int_t sIBcastRecvUPanel(int_t k, int_t k0, int* msgcnt, MPI_Request *,
1057 MPI_Request *, int_t* Usub_buf, float* Uval_buf,
1058 gridinfo_t *, sLUstruct_t *, SCT_t *, int tag_ub);
1059extern int_t sWaitL(int_t k, int* msgcnt, int* msgcntU, MPI_Request *,
1060 MPI_Request *, gridinfo_t *, sLUstruct_t *, SCT_t *);
1061extern int_t sWaitU(int_t k, int* msgcnt, MPI_Request *, MPI_Request *,
1062 gridinfo_t *, sLUstruct_t *, SCT_t *);
1063extern int_t sLPanelTrSolve(int_t k, int_t* factored_L, float* BlockUFactor,
1064 gridinfo_t *, sLUstruct_t *);
1065
1066 /* from trfAux.h */
1067extern int getNsupers(int, Glu_persist_t *);
1068extern int_t initPackLUInfo(int_t nsupers, packLUInfo_t* packLUInfo);
1069extern int freePackLUInfo(packLUInfo_t* packLUInfo);
1072 lPanelInfo_t *, int_t*, int_t *, int_t *,
1073 float *bigU, int_t* Lsub_buf,
1074 float* Lval_buf, int_t* Usub_buf,
1075 float* Uval_buf, gridinfo_t *, sLUstruct_t *);
1079 sLUValSubBuf_t* LUvsb, gridinfo_t *,
1080 sLUstruct_t *, HyP_t*);
1081extern float* sgetBigV(int_t, int_t);
1084// permutation from superLU default
1085
1086 /* from treeFactorization.h */
1089 int_t ldt, int_t num_threads, int_t nsupers,
1091extern int sfreeScuBufs(sscuBufs_t* scuBufs);
1092
1093#if 0 // NOT CALLED
1094// the generic tree factoring code
1095extern int_t treeFactor(
1096 int_t nnnodes, // number of nodes in the tree
1097 int_t *perm_c_supno, // list of nodes in the order of factorization
1098 commRequests_t *comReqs, // lists of communication requests
1099 sscuBufs_t *scuBufs, // contains buffers for schur complement update
1100 packLUInfo_t*packLUInfo,
1101 msgs_t*msgs,
1102 sLUValSubBuf_t* LUvsb,
1103 sdiagFactBufs_t *dFBuf,
1104 factStat_t *factStat,
1105 factNodelists_t *fNlists,
1106 superlu_dist_options_t *options,
1107 int_t * gIperm_c_supno,
1108 int_t ldt,
1109 sLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1110 double thresh, SCT_t *SCT,
1111 int *info
1112);
1113#endif
1114
1116 int_t nnodes, // number of nodes in the tree
1117 int_t *perm_c_supno, // list of nodes in the order of factorization
1118 treeTopoInfo_t* treeTopoInfo,
1119 commRequests_t *comReqs, // lists of communication requests
1120 sscuBufs_t *scuBufs, // contains buffers for schur complement update
1121 packLUInfo_t*packLUInfo,
1122 msgs_t*msgs,
1123 sLUValSubBuf_t* LUvsb,
1124 sdiagFactBufs_t *dFBuf,
1125 factStat_t *factStat,
1126 factNodelists_t *fNlists,
1127 superlu_dist_options_t *options,
1128 int_t * gIperm_c_supno,
1129 int_t ldt,
1130 sLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1131 double thresh, SCT_t *SCT,
1132 int *info
1133);
1134
1136 int_t nnnodes, // number of nodes in the tree
1137 int_t *perm_c_supno, // list of nodes in the order of factorization
1138 commRequests_t *comReqs, // lists of communication requests
1139 sscuBufs_t *scuBufs, // contains buffers for schur complement update
1140 packLUInfo_t*packLUInfo,
1141 msgs_t*msgs,
1142 sLUValSubBuf_t* LUvsb,
1143 sdiagFactBufs_t *dFBuf,
1144 factStat_t *factStat,
1145 factNodelists_t *fNlists,
1146 superlu_dist_options_t *options,
1147 int_t * gIperm_c_supno,
1148 int_t ldt,
1149 sLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1150 double thresh, SCT_t *SCT, int tag_ub,
1151 int *info
1152);
1153
1155 sForest_t* sforest,
1156 commRequests_t **comReqss, // lists of communication requests // size maxEtree level
1157 sscuBufs_t *scuBufs, // contains buffers for schur complement update
1158 packLUInfo_t*packLUInfo,
1159 msgs_t**msgss, // size=num Look ahead
1160 sLUValSubBuf_t** LUvsbs, // size=num Look ahead
1161 sdiagFactBufs_t **dFBufs, // size maxEtree level
1162 factStat_t *factStat,
1163 factNodelists_t *fNlists,
1164 gEtreeInfo_t* gEtreeInfo, // global etree info
1165 superlu_dist_options_t *options,
1166 int_t * gIperm_c_supno,
1167 int_t ldt,
1168 HyP_t* HyP,
1169 sLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1170 double thresh, SCT_t *SCT, int tag_ub,
1171 int *info
1172);
1174extern int sLluBufFreeArr(int_t numLA, sLUValSubBuf_t **LUvsbs);
1176extern int sfreeDiagFactBufsArr(int_t mxLeafNode, sdiagFactBufs_t** dFBufs);
1178extern int_t checkRecvUDiag(int_t k, commRequests_t *comReqs,
1179 gridinfo_t *grid, SCT_t *SCT);
1180extern int_t checkRecvLDiag(int_t k, commRequests_t *comReqs, gridinfo_t *, SCT_t *);
1181
1182#if 0 // NOT CALLED
1183/* from ancFactorization.h (not called) */
1184extern int_t ancestorFactor(
1185 int_t ilvl, // level of factorization
1186 sForest_t* sforest,
1187 commRequests_t **comReqss, // lists of communication requests // size maxEtree level
1188 sscuBufs_t *scuBufs, // contains buffers for schur complement update
1189 packLUInfo_t*packLUInfo,
1190 msgs_t**msgss, // size=num Look ahead
1191 sLUValSubBuf_t** LUvsbs, // size=num Look ahead
1192 sdiagFactBufs_t **dFBufs, // size maxEtree level
1193 factStat_t *factStat,
1194 factNodelists_t *fNlists,
1195 gEtreeInfo_t* gEtreeInfo, // global etree info
1196 superlu_dist_options_t *options,
1197 int_t * gIperm_c_supno,
1198 int_t ldt,
1199 HyP_t* HyP,
1200 sLUstruct_t *LUstruct, gridinfo3d_t * grid3d, SuperLUStat_t *stat,
1201 double thresh, SCT_t *SCT, int tag_ub, int *info
1202);
1203#endif
1204
1205/*== end 3D prototypes ===================*/
1206
1207
1208#ifdef __cplusplus
1209 }
1210#endif
1211
1212#endif /* __SUPERLU_dDEFS */
1213
int j
Definition: dutil_dist.c:248
#define strtri_
Definition: superlu_FCnames.h:160
#define NBUFFERS
Definition: superlu_defs.h:194
int int_t
Definition: superlu_defs.h:114
DiagScale_t
Definition: superlu_enum_consts.h:35
fact_t
Definition: superlu_enum_consts.h:30
int screate_matrix_postfix(SuperMatrix *, int, float **, int *, float **, int *, FILE *, char *, gridinfo_t *)
Definition: screate_matrix.c:76
int_t szRecvLPanel(int_t k, int_t sender, float alpha, float beta, float *Lval_buf, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void Printfloat5(char *, int_t, float *)
Definition: sutil_dist.c:543
void psgssvx(superlu_dist_options_t *, SuperMatrix *, sScalePermstruct_t *, float *, int, int, gridinfo_t *, sLUstruct_t *, sSOLVEstruct_t *, float *, SuperLUStat_t *, int *)
int_t initPackLUInfo(int_t nsupers, packLUInfo_t *packLUInfo)
Definition: treeFactorization.c:367
int screate_matrix(SuperMatrix *, int, float **, int *, float **, int *, FILE *, gridinfo_t *)
Definition: screate_matrix.c:348
int screate_matrix_postfix3d(SuperMatrix *A, int nrhs, float **rhs, int *ldb, float **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
Definition: screate_matrix3d.c:72
int_t sTrs2_GatherTrsmScatter(int_t klst, int_t iukp, int_t rukp, int_t *usub, float *uval, float *tempv, int_t knsupc, int nsupr, float *lusup, Glu_persist_t *Glu_persist)
Definition: psgstrf2.c:716
void sDestroy_A3d_gathered_on_2d(sSOLVEstruct_t *, gridinfo3d_t *)
Definition: psutil.c:798
int updateDirtyBit(int_t k0, HyP_t *HyP, gridinfo_t *grid)
Definition: sec_structs.c:618
int superlu_sgemv(const char *trans, const int m, const int n, const float alpha, const float *a, const int lda, const float *x, const int incx, const float beta, float *y, const int incy)
void sCreate_CompCol_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, float *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
void sDestroy_trf3Dpartition(trf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d)
void psgstrs_Bglobal(int_t, sLUstruct_t *, gridinfo_t *, float *, int_t, int, SuperLUStat_t *, int *)
Definition: psgstrs_Bglobal.c:104
int saxpy_(const int *n, const float *alpha, const float *x, const int *incx, float *y, const int *incy)
int sPrint_CompRowLoc_Matrix_dist(SuperMatrix *)
int_t sWait_LRecv(MPI_Request *, int *msgcnt, int *msgcntsU, gridinfo_t *, SCT_t *)
int superlu_sscal(const int n, const float alpha, float *x, const int incx)
void sGatherNRformat_loc3d(fact_t Fact, NRformat_loc *A, float *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **)
int psCompRow_loc_to_CompCol_global(int_t, SuperMatrix *, gridinfo_t *, SuperMatrix *)
Gather A from the distributed compressed row format to global A in compressed column format.
Definition: psutil.c:29
void pslaqgs(SuperMatrix *, float *, float *, float, float, float, char *)
Definition: pslaqgs.c:85
int psgsmv_AXglobal_abs(int_t, int_t[], float[], int_t[], float[], float[])
Definition: psgsmv_AXglobal.c:285
void sCreate_SuperNode_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, float *, int_t *, int_t *, int_t *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
int_t szRecvUPanel(int_t k, int_t sender, float alpha, float beta, float *Uval_buf, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void psgssvx3d(superlu_dist_options_t *, SuperMatrix *, sScalePermstruct_t *, float B[], int ldb, int nrhs, gridinfo3d_t *, sLUstruct_t *, sSOLVEstruct_t *, float *berr, SuperLUStat_t *, int *info)
Definition: psgssvx3d.c:501
void psgsrfs_ABXglobal(int_t, SuperMatrix *, float, sLUstruct_t *, gridinfo_t *, float *, int_t, float *, int_t, int, float *, SuperLUStat_t *, int *)
Definition: psgsrfs_ABXglobal.c:125
int_t sAllocLlu_3d(int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t scuStatUpdate(int_t knsupc, HyP_t *HyP, SCT_t *SCT, SuperLUStat_t *stat)
Definition: sec_structs.c:635
void sCompRow_to_CompCol_dist(int_t, int_t, int_t, float *, int_t *, int_t *, float **, int_t **, int_t **)
void psgstrs2(int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, sLocalLU_t *Llu, SuperLUStat_t *stat)
int_t sscatter3dLPanels(int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void psinf_norm_error(int, int_t, int_t, float[], int_t, float[], int_t, MPI_Comm)
Check the inf-norm of the error vector.
Definition: psutil.c:825
int_t sscatter3dUPanels(int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t sp3dCollect(int_t layer, int_t n, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void sreadrb_dist(int, FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadrb.c:275
void sPrint_Dense_Matrix_dist(SuperMatrix *)
int_t sDiagFactIBCast(int_t k, int_t k0, float *BlockUFactor, float *BlockLFactor, int_t *IrecvPlcd_D, MPI_Request *, MPI_Request *, MPI_Request *, MPI_Request *, gridinfo_t *, superlu_dist_options_t *, double thresh, sLUstruct_t *LUstruct, SuperLUStat_t *, int *info, SCT_t *, int tag_ub)
int sAllocGlu_3d(int_t n, int_t nsupers, sLUstruct_t *)
Definition: sutil_dist.c:432
void sGenXtrue_dist(int_t, int_t, float *, int_t)
Definition: sutil_dist.c:486
void sClone_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *)
int getNsupers(int, Glu_persist_t *)
Definition: trfAux.c:42
void slsum_fmod_inv(float *, float *, float *, float *, int, int_t, int *fmod, int_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int_t, int_t, int, int)
Definition: psgstrs_lsum.c:416
float psdistribute(fact_t, int_t, SuperMatrix *, sScalePermstruct_t *, Glu_freeable_t *, sLUstruct_t *, gridinfo_t *)
Definition: psdistribute.c:326
int sDeAllocLlu_3d(int_t n, sLUstruct_t *, gridinfo3d_t *)
Definition: sutil_dist.c:450
void psgstrs(int_t, sLUstruct_t *, sScalePermstruct_t *, gridinfo_t *, float *, int_t, int_t, int_t, int, sSOLVEstruct_t *, SuperLUStat_t *, int *)
Definition: psgstrs.c:840
int sread_binary(FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sbinary_io.c:4
void sComputeLevelsets(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *, int_t *)
void sInit_HyP(HyP_t *HyP, sLocalLU_t *Llu, int_t mcb, int_t mrb)
void sPrint_CompCol_Matrix_dist(SuperMatrix *)
int_t szeroSetLU(int_t nnodes, int_t *nodeList, sLUstruct_t *, gridinfo3d_t *)
void sPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *)
Print the blocks in the factored matrix L.
Definition: sutil_dist.c:570
int_t sblock_gemm_scatterTopLeft(int_t lb, int_t j, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, sLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int_t sIBcastRecvUPanel(int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Usub_buf, float *Uval_buf, gridinfo_t *, sLUstruct_t *, SCT_t *, int tag_ub)
int file_sPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A)
void Free_HyP(HyP_t *HyP)
Definition: sec_structs.c:594
void psGetDiagU(int_t, sLUstruct_t *, gridinfo_t *, float *)
Definition: psGetDiagU.c:66
int_t ancestorFactor(int_t ilvl, sForest_t *sforest, commRequests_t **comReqss, sscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, sLUValSubBuf_t **LUvsbs, sdiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, HyP_t *HyP, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
int_t sISend_LDiagBlock(int_t k0, float *lblk_ptr, int_t size, MPI_Request *, gridinfo_t *, int)
void sger_(const int *, const int *, const float *, const float *, const int *, const float *, const int *, float *, const int *)
int_t sreduceAncestors3d(int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, float *Lval_buf, float *Uval_buf, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t scollect3dUpanels(int_t layer, int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void sgemv_(const char *, const int *, const int *, const float *, const float *a, const int *, const float *, const int *, const float *, float *, const int *)
trf3Dpartition_t * sinitTrf3Dpartition(int_t nsupers, superlu_dist_options_t *options, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void sinf_norm_error_dist(int_t, int_t, float *, int_t, float *, int_t, gridinfo_t *)
Check the inf-norm of the error vector.
Definition: sutil_dist.c:522
void sScalePermstructFree(sScalePermstruct_t *)
Deallocate ScalePermstruct.
Definition: sutil_dist.c:409
void sCreate_CompRowLoc_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, int_t, int_t, float *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
int sstatic_schedule(superlu_dist_options_t *, int, int, sLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int_t *, int_t *, int *)
Definition: sstatic_schedule.c:46
int_t checkRecvUDiag(int_t k, commRequests_t *comReqs, gridinfo_t *grid, SCT_t *SCT)
Definition: treeFactorization.c:401
float * floatCalloc_dist(int_t)
Definition: smemory_dist.c:162
int sSolveInit(superlu_dist_options_t *, SuperMatrix *, int_t[], int_t[], int_t, sLUstruct_t *, gridinfo_t *, sSOLVEstruct_t *)
Initialize the data structure for the solution phase.
Definition: psutil.c:690
int superlu_saxpy(const int n, const float alpha, const float *x, const int incx, float *y, const int incy)
int_t sblock_gemm_scatterBottomRight(int_t lb, int_t j, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, sLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int superlu_sgemm(const char *transa, const char *transb, int m, int n, int k, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc)
void sCopy_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *)
Definition: sutil_dist.c:324
void sgsequ_dist(SuperMatrix *, float *, float *, float *, float *, float *, int_t *)
Definition: sgsequ_dist.c:94
void sGenCSRLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *, float **, int_t **, int_t **, int_t *, int_t *)
int_t sblock_gemm_scatterTopRight(int_t lb, int_t j, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, sLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int sDeAllocGlu_3d(sLUstruct_t *)
Definition: sutil_dist.c:442
void psgssvx_ABglobal(superlu_dist_options_t *, SuperMatrix *, sScalePermstruct_t *, float *, int, int, gridinfo_t *, sLUstruct_t *, float *, SuperLUStat_t *, int *)
void psgstrf2_trsm(superlu_dist_options_t *options, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, sLocalLU_t *, MPI_Request *, int tag_ub, SuperLUStat_t *, int *info)
Definition: psgstrf2.c:143
int_t sIBcast_UPanel(int_t k, int_t k0, int_t *usub, float *uval, gridinfo_t *, int *msgcnt, MPI_Request *, int *ToSendD, int)
void sinit3DLUstructForest(int_t *myTreeIdxs, int_t *myZeroTrIdxs, sForest_t **sForests, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t ssparseTreeFactor(int_t nnodes, int_t *perm_c_supno, treeTopoInfo_t *treeTopoInfo, commRequests_t *comReqs, sscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, sLUValSubBuf_t *LUvsb, sdiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int *info)
int strsm_(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *)
int sreduceAllAncestors3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm, sLUValSubBuf_t *LUvsb, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void sreadtriple_dist(FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadtriple.c:35
int s_c2cpp_GetHWPM(SuperMatrix *, gridinfo_t *, sScalePermstruct_t *)
Definition: s_c2cpp_GetHWPM.cpp:54
void sScalePermstructInit(const int_t, const int_t, sScalePermstruct_t *)
Allocate storage in ScalePermstruct.
Definition: sutil_dist.c:398
void sscatter_l(int ib, int ljb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *usub, int_t *lsub, float *tempv, int *indirect_thread, int *indirect2, int_t **Lrowind_bc_ptr, float **Lnzval_bc_ptr, gridinfo_t *grid)
int screate_matrix_rb(SuperMatrix *, int, float **, int *, float **, int *, FILE *, gridinfo_t *)
void psgstrf2(superlu_dist_options_t *, int_t nsupers, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, sLocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *)
int sfreeScuBufs(sscuBufs_t *scuBufs)
int_t sdenseTreeFactor(int_t nnnodes, int_t *perm_c_supno, commRequests_t *comReqs, sscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, sLUValSubBuf_t *LUvsb, sdiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
int_t sIBcast_LPanel(int_t k, int_t k0, int_t *lsub, float *lusup, gridinfo_t *, int *msgcnt, MPI_Request *, int **ToSendR, int_t *xsup, int)
void sreadhb_dist(int, FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadhb.c:107
int freePackLUInfo(packLUInfo_t *packLUInfo)
Definition: treeFactorization.c:376
int_t psgstrs_init(int_t, int_t, int_t, int_t, int_t[], int_t[], gridinfo_t *grid, Glu_persist_t *, sSOLVEstruct_t *)
Definition: psutil.c:561
int_t sWait_URecv(MPI_Request *, int *msgcnt, SCT_t *)
int_t sIRecv_UDiagBlock(int_t k0, float *ublk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int)
int_t sinitScuBufs(int_t ldt, int_t num_threads, int_t nsupers, sscuBufs_t *, sLUstruct_t *, gridinfo_t *)
int_t sIrecv_LPanel(int_t k, int_t k0, int_t *Lsub_buf, float *Lval_buf, gridinfo_t *, MPI_Request *, sLocalLU_t *, int)
void sGenCSCLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *, float **, int_t **, int_t **, int_t *, int_t *)
int superlu_strsv(char *uplo, char *trans, char *diag, int n, float *a, int lda, float *x, int incx)
void sZeroLblocks(int, int, gridinfo_t *, sLUstruct_t *)
Sets all entries of matrix L to zero.
Definition: sutil_dist.c:619
float sdistribute(fact_t, int_t, SuperMatrix *, Glu_freeable_t *, sLUstruct_t *, gridinfo_t *)
Definition: sdistribute.c:63
int_t psgstrf(superlu_dist_options_t *, int, int, float anorm, sLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *)
Definition: psgstrf.c:242
void sGenCOOLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *, int_t **, int_t **, float **, int_t *, int_t *)
int_t sPackLBlock(int_t k, float *Dest, Glu_persist_t *, gridinfo_t *, sLocalLU_t *)
int_t sLPanelUpdate(int_t k, int_t *IrecvPlcd_D, int_t *factored_L, MPI_Request *, float *BlockUFactor, gridinfo_t *, sLUstruct_t *, SCT_t *)
int_t sUPanelUpdate(int_t k, int_t *factored_U, MPI_Request *, float *BlockLFactor, float *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, sLUstruct_t *, SuperLUStat_t *, SCT_t *)
void Local_Sgstrf2(superlu_dist_options_t *options, int_t k, double thresh, float *BlockUFactor, Glu_persist_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t *, int *info, SCT_t *)
int_t treeFactor(int_t nnnodes, int_t *perm_c_supno, commRequests_t *comReqs, sscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, sLUValSubBuf_t *LUvsb, sdiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int *info)
int_t szSendLPanel(int_t k, int_t receiver, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void * duser_malloc_dist(int_t, int_t)
Definition: dmemory_dist.c:30
int_t LDiagBlockRecvWait(int_t k, int_t *factored_U, MPI_Request *, gridinfo_t *)
Definition: communication_aux.c:218
int_t sIRecv_LDiagBlock(int_t k0, float *L_blk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int)
int superlu_strsm(const char *sideRL, const char *uplo, const char *transa, const char *diag, const int m, const int n, const float alpha, const float *a, const int lda, float *b, const int ldb)
void sDestroy_LU(int_t, gridinfo_t *, sLUstruct_t *)
Destroy distributed L & U matrices.
Definition: psutil.c:435
int psgsmv_AXglobal(int_t, int_t[], float[], int_t[], float[], float[])
Definition: psgsmv_AXglobal.c:259
int_t sUPanelTrSolve(int_t k, float *BlockLFactor, float *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, sLUstruct_t *, SuperLUStat_t *, SCT_t *)
int psgsmv_AXglobal_setup(SuperMatrix *, Glu_persist_t *, gridinfo_t *, int_t *, int_t *[], float *[], int_t *[], int_t[])
void slsum_fmod_inv_master(float *, float *, float *, float *, int, int, int_t, int *fmod, int_t, int_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t **, int_t, int_t, int_t, int_t, int, int)
Definition: psgstrs_lsum.c:956
void sZero_CompRowLoc_Matrix_dist(SuperMatrix *)
Sets all entries of a matrix to zero, A_{i,j}=0, for i,j=1,..,n.
Definition: sutil_dist.c:339
void slsum_fmod(float *, float *, float *, float *, int, int, int_t, int *fmod, int_t, int_t, int_t, int_t *, gridinfo_t *, sLocalLU_t *, MPI_Request[], SuperLUStat_t *)
Definition: psgstrs_lsum.c:62
void sallocateA_dist(int_t, int_t, float **, int_t **, int_t **)
Definition: smemory_dist.c:147
int_t sIrecv_UPanel(int_t k, int_t k0, int_t *Usub_buf, float *, sLocalLU_t *, gridinfo_t *, MPI_Request *, int)
void psgsequ(SuperMatrix *, float *, float *, float *, float *, float *, int_t *, gridinfo_t *)
Definition: psgsequ.c:86
void sgather_u(int_t num_u_blks, Ublock_info_t *Ublock_info, int_t *usub, float *uval, float *bigU, int_t ldu, int_t *xsup, int_t klst)
void sLUstructInit(const int_t, sLUstruct_t *)
Allocate storage in LUstruct.
Definition: psutil.c:402
int_t sgatherFactoredLU(int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, sLUValSubBuf_t *LUvsb, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void sSolveFinalize(superlu_dist_options_t *, sSOLVEstruct_t *)
Release the resources used for the solution phase.
Definition: psutil.c:778
int sscal_(const int *n, const float *alpha, float *dx, const int *incx)
int_t sSchurComplementSetupGPU(int_t k, msgs_t *msgs, packLUInfo_t *, int_t *, int_t *, int_t *, gEtreeInfo_t *, factNodelists_t *, sscuBufs_t *, sLUValSubBuf_t *LUvsb, gridinfo_t *, sLUstruct_t *, HyP_t *)
void psCompute_Diag_Inv(int_t, sLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *)
Definition: psgstrs.c:649
void sreadtriple_noheader(FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadtriple_noheader.c:35
int_t sTrs2_GatherU(int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, float *uval, float *tempv)
Definition: psgstrf2.c:669
void slaqgs_dist(SuperMatrix *, float *, float *, float, float, float, char *)
Definition: slaqgs_dist.c:93
int_t sblock_gemm_scatterBottomLeft(int_t lb, int_t j, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, sLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int file_Printfloat5(FILE *, char *, int_t, float *)
Definition: sutil_dist.c:555
void sCreate_Dense_Matrix_dist(SuperMatrix *, int_t, int_t, float *, int_t, Stype_t, Dtype_t, Mtype_t)
void sblock_gemm_scatter(int_t lb, int_t j, Ublock_info_t *Ublock_info, Remain_info_t *Remain_info, float *L_mat, int ldl, float *U_mat, int ldu, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int_t thread_id, int *indirect, int *indirect2, int_t **Lrowind_bc_ptr, float **Lnzval_bc_ptr, int_t **Ufstnz_br_ptr, float **Unzval_br_ptr, int_t *xsup, gridinfo_t *, SuperLUStat_t *)
int_t sUDiagBlockRecvWait(int_t k, int_t *IrecvPlcd_D, int_t *factored_L, MPI_Request *, gridinfo_t *, sLUstruct_t *, SCT_t *)
int strsv_(char *, char *, char *, int *, float *, int *, float *, int *)
void pxgstrs_finalize(pxgstrs_comm_t *)
Definition: util.c:266
float sdist_psymbtonum(fact_t, int_t, SuperMatrix *, sScalePermstruct_t *, Pslu_freeable_t *, sLUstruct_t *, gridinfo_t *)
Definition: pssymbfact_distdata.c:1188
int_t sLpanelUpdate(int_t off0, int_t nsupc, float *ublk_ptr, int_t ld_ujrow, float *lusup, int_t nsupr, SCT_t *)
int sldperm_dist(int, int, int_t, int_t[], int_t[], float[], int_t *, float[], float[])
Definition: sldperm_dist.c:96
int_t sinitDiagFactBufs(int_t ldt, sdiagFactBufs_t *dFBuf)
int sLluBufFreeArr(int_t numLA, sLUValSubBuf_t **LUvsbs)
sLUValSubBuf_t ** sLluBufInitArr(int_t numLA, sLUstruct_t *LUstruct)
float * sgetBigV(int_t, int_t)
int_t psReDistribute_B_to_X(float *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, float *x, sScalePermstruct_t *, Glu_persist_t *, gridinfo_t *, sSOLVEstruct_t *)
Definition: psgstrs.c:155
void sRgather_U(int_t k, int_t jj0, int_t *usub, float *uval, float *bigU, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno, int_t *perm_u)
int_t psgstrf3d(superlu_dist_options_t *, int m, int n, float anorm, trf3Dpartition_t *, SCT_t *, sLUstruct_t *, gridinfo3d_t *, SuperLUStat_t *, int *)
Definition: psgstrf3d.c:121
void psgsmv_finalize(psgsmv_comm_t *)
Definition: psgsmv.c:371
int sp_strsv_dist(char *, char *, char *, SuperMatrix *, SuperMatrix *, float *, int *)
Definition: ssp_blas2_dist.c:95
void sLUstructFree(sLUstruct_t *)
Deallocate LUstruct.
Definition: psutil.c:416
int sgemm_(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *)
int_t sTrs2_ScatterU(int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, float *uval, float *tempv)
Definition: psgstrf2.c:694
void sCopy_CompCol_Matrix_dist(SuperMatrix *, SuperMatrix *)
int sp_sgemv_dist(char *, float, SuperMatrix *, float *, int, float, float *, int)
SpGEMV.
Definition: ssp_blas2_dist.c:391
void slsum_bmod_inv(float *, float *, float *, float *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int, int)
Definition: psgstrs_lsum.c:1355
float pslangs(char *, SuperMatrix *, gridinfo_t *)
Definition: pslangs.c:65
void sfill_dist(float *, int_t, float)
Fills a float precision array with a given value.
Definition: sutil_dist.c:512
void slsum_bmod(float *, float *, float *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, sLocalLU_t *, MPI_Request[], SuperLUStat_t *)
Definition: psgstrs_lsum.c:246
int_t sRecv_UDiagBlock(int_t k0, float *ublk_ptr, int_t size, int_t src, gridinfo_t *, SCT_t *, int)
int_t ssparseTreeFactor_ASYNC(sForest_t *sforest, commRequests_t **comReqss, sscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, sLUValSubBuf_t **LUvsbs, sdiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, HyP_t *HyP, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
int_t sSchurComplementSetup(int_t k, int *msgcnt, Ublock_info_t *, Remain_info_t *, uPanelInfo_t *, lPanelInfo_t *, int_t *, int_t *, int_t *, float *bigU, int_t *Lsub_buf, float *Lval_buf, int_t *Usub_buf, float *Uval_buf, gridinfo_t *, sLUstruct_t *)
void sRgather_L(int_t k, int_t *lsub, float *lusup, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno)
int psPermute_Dense_Matrix(int_t, int_t, int_t[], int_t[], float[], int, float[], int, int, gridinfo_t *)
Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-t...
Definition: psutil.c:291
void sscatter_u(int ib, int jb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *lsub, int_t *usub, float *tempv, int_t **Ufstnz_br_ptr, float **Unzval_br_ptr, gridinfo_t *grid)
void sScaleAdd_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *, float)
Scale and add: adds a scalar multiple of one matrix to another. A_{i,j} = c * A_{i,...
Definition: sutil_dist.c:381
void sDestroy_Tree(int_t, gridinfo_t *, sLUstruct_t *)
Destroy broadcast and reduction trees used in triangular solve.
Definition: psutil.c:854
int_t sLluBufInit(sLUValSubBuf_t *, sLUstruct_t *)
void slsum_bmod_inv_master(float *, float *, float *, float *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t **, int_t, int_t, int, int)
Definition: psgstrs_lsum.c:1819
int_t sISend_UDiagBlock(int_t k0, float *ublk_ptr, int_t size, MPI_Request *, gridinfo_t *, int)
void duser_free_dist(int_t, int_t)
Definition: dmemory_dist.c:49
int_t sBcast_UPanel(int_t k, int_t k0, int_t *usub, float *uval, gridinfo_t *, int *msgcnt, int *ToSendD, SCT_t *, int)
int_t szSendUPanel(int_t k, int_t receiver, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t checkRecvLDiag(int_t k, commRequests_t *comReqs, gridinfo_t *, SCT_t *)
Definition: treeFactorization.c:422
void sScaleAddId_CompRowLoc_Matrix_dist(SuperMatrix *, float)
Scale and add I: scales a matrix and adds an identity. A_{i,j} = c * A_{i,j} + \delta_{i,...
Definition: sutil_dist.c:356
int_t sLPanelTrSolve(int_t k, int_t *factored_L, float *BlockUFactor, gridinfo_t *, sLUstruct_t *)
int_t sWaitL(int_t k, int *msgcnt, int *msgcntU, MPI_Request *, MPI_Request *, gridinfo_t *, sLUstruct_t *, SCT_t *)
float slangs_dist(char *, SuperMatrix *)
Definition: slangs_dist.c:72
void sCopy_Dense_Matrix_dist(int_t, int_t, float *, int_t, float *, int_t)
int_t sWaitU(int_t k, int *msgcnt, MPI_Request *, MPI_Request *, gridinfo_t *, sLUstruct_t *, SCT_t *)
int_t sQuerySpace_dist(int_t, sLUstruct_t *, gridinfo_t *, SuperLUStat_t *, superlu_dist_mem_usage_t *)
Definition: smemory_dist.c:73
int_t sinit3DLUstruct(int_t *myTreeIdxs, int_t *myZeroTrIdxs, int_t *nodeCount, int_t **nodeList, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int sp_sgemm_dist(char *, int, float, SuperMatrix *, float *, int, float, float *, int)
Definition: ssp_blas3_dist.c:126
int screate_matrix3d(SuperMatrix *A, int nrhs, float **rhs, int *ldb, float **x, int *ldx, FILE *fp, gridinfo3d_t *grid3d)
int_t sgatherAllFactoredLU(trf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t scollect3dLpanels(int_t layer, int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
sdiagFactBufs_t ** sinitDiagFactBufsArr(int_t mxLeafNode, int_t ldt, gridinfo_t *grid)
void s3D_printMemUse(trf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: smemory_dist.c:243
float * sgetBigU(int_t, gridinfo_t *, sLUstruct_t *)
float * floatMalloc_dist(int_t)
Definition: smemory_dist.c:155
void sgather_l(int_t num_LBlk, int_t knsupc, Remain_info_t *L_info, float *lval, int_t LD_lval, float *L_buff)
void sPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *)
Print the blocks in the factored matrix U.
Definition: sutil_dist.c:750
void sFillRHS_dist(char *, int_t, float *, int_t, SuperMatrix *, float *, int_t)
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's.
Definition: sutil_dist.c:499
void sZeroUblocks(int iam, int n, gridinfo_t *, sLUstruct_t *)
Sets all entries of matrix U to zero.
Definition: sutil_dist.c:791
void psgstrs2_omp(int_t k0, int_t k, Glu_persist_t *, gridinfo_t *, sLocalLU_t *, Ublock_info_t *, SuperLUStat_t *)
Definition: psgstrf2.c:762
int sfreeDiagFactBufsArr(int_t mxLeafNode, sdiagFactBufs_t **dFBufs)
int_t sBcast_LPanel(int_t k, int_t k0, int_t *lsub, float *lusup, gridinfo_t *, int *msgcnt, int **ToSendR, int_t *xsup, SCT_t *, int)
int_t sp3dScatter(int_t n, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t sIBcastRecvLPanel(int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Lsub_buf, float *Lval_buf, int_t *factored, gridinfo_t *, sLUstruct_t *, SCT_t *, int tag_ub)
void psgsmv_init(SuperMatrix *, int_t *, gridinfo_t *, psgsmv_comm_t *)
Definition: psgsmv.c:27
int sScatter_B3d(NRformat_loc3d *A3d, gridinfo3d_t *grid3d)
void psgsmv(int_t, SuperMatrix *, gridinfo_t *, psgsmv_comm_t *, float x[], float ax[])
Definition: psgsmv.c:235
int_t sgatherAllFactoredLUFr(int_t *myZeroTrIdxs, sForest_t *sForests, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int screate_matrix_dat(SuperMatrix *, int, float **, int *, float **, int *, FILE *, gridinfo_t *)
void psgsrfs(int_t, SuperMatrix *, float, sLUstruct_t *, sScalePermstruct_t *, gridinfo_t *, float[], int_t, float[], int_t, int, sSOLVEstruct_t *, float *, SuperLUStat_t *, int *)
int superlu_sger(const int m, const int n, const float alpha, const float *x, const int incx, const float *y, const int incy, float *a, const int lda)
void sreadMM_dist(FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadMM.c:38
Mtype_t
Definition: supermatrix.h:42
Dtype_t
Definition: supermatrix.h:35
Stype_t
Definition: supermatrix.h:22
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public trans
Definition: superlupara.f90:35
integer, parameter, public factored
Definition: superlupara.f90:35
integer, parameter, public lusup
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: superlu_defs.h:1142
Definition: superlu_defs.h:490
Definition: superlu_defs.h:435
Definition: superlu_ddefs.h:329
Definition: supermatrix.h:194
Definition: supermatrix.h:176
Definition: psymbfact.h:57
Definition: superlu_defs.h:770
Definition: util_dist.h:172
Definition: util_dist.h:95
Definition: supermatrix.h:54
Definition: superlu_defs.h:760
Definition: superlu_defs.h:752
Definition: superlu_defs.h:924
Definition: superlu_defs.h:937
Definition: superlu_defs.h:839
Definition: superlu_defs.h:890
Definition: superlu_defs.h:398
Definition: superlu_defs.h:388
Definition: superlu_ddefs.h:317
Definition: superlu_defs.h:947
Definition: superlu_ddefs.h:397
Definition: superlu_sdefs.h:263
void * val_tosend
Definition: superlu_sdefs.h:307
void * val_torecv
Definition: superlu_sdefs.h:308
Definition: superlu_defs.h:551
Definition: superlu_defs.h:901
Definition: superlu_sdefs.h:357
Definition: superlu_sdefs.h:254
Definition: superlu_sdefs.h:97
int64_t Unzval_bc_cnt
Definition: superlu_sdefs.h:133
int64_t * Unzval_bc_offset
Definition: superlu_sdefs.h:132
int64_t * Uindval_loc_bc_offset
Definition: superlu_sdefs.h:137
int64_t Ucolind_bc_cnt
Definition: superlu_sdefs.h:128
int_t * Uindval_loc_bc_dat
Definition: superlu_sdefs.h:136
int64_t * Ucolind_bc_offset
Definition: superlu_sdefs.h:127
int_t ** Uindval_loc_bc_ptr
Definition: superlu_sdefs.h:135
int_t ** Ucolind_bc_ptr
Definition: superlu_sdefs.h:125
int64_t Uindval_loc_bc_cnt
Definition: superlu_sdefs.h:138
int_t * Ucolind_bc_dat
Definition: superlu_sdefs.h:126
float * Unzval_bc_dat
Definition: superlu_sdefs.h:131
float ** Unzval_bc_ptr
Definition: superlu_sdefs.h:130
Definition: superlu_sdefs.h:284
Definition: superlu_sdefs.h:76
Definition: superlu_sdefs.h:391
Definition: superlu_sdefs.h:385
Definition: superlu_sdefs.h:406
sForest_t ** sForests
Definition: superlu_sdefs.h:413
int_t * myTreeIdxs
Definition: superlu_sdefs.h:410
int_t * myZeroTrIdxs
Definition: superlu_sdefs.h:411
sLUValSubBuf_t * LUvsb
Definition: superlu_sdefs.h:415
int_t * supernode2treeMap
Definition: superlu_sdefs.h:414
int_t * myNodeCount
Definition: superlu_sdefs.h:409
gEtreeInfo_t gEtreeInfo
Definition: superlu_sdefs.h:407
int_t ** treePerm
Definition: superlu_sdefs.h:412
int_t * iperm_c_supno
Definition: superlu_sdefs.h:408
Definition: superlu_defs.h:744
Definition: superlu_defs.h:712
Definition: superlu_defs.h:882
Definition: superlu_ddefs.h:308
Definitions which are precision-neutral.
#define MAX_LOOKAHEADS
Definition: superlu_sdefs.h:96