28#ifndef __SUPERLU_SDEFS
29#define __SUPERLU_SDEFS
96#define MAX_LOOKAHEADS 50
425 int_t first_l_block_acc , first_u_block_acc;
427 int_t *Lblock_dirty_bit, * Ublock_dirty_bit;
428 float *lookAhead_L_buff, *Remain_L_buff;
431 int_t num_look_aheads, nsupers;
433 int_t num_u_blks, num_u_blks_Phi;
443 int offloadCondition;
444 int superlu_acc_offload;
540 float **,
int *, FILE *,
char *,
gridinfo_t *);
548 float *,
float *,
int *);
551 float,
float,
char *);
556 float,
float,
char *);
558 float [],
int,
float [],
int,
int,
564 int,
float,
float *,
int);
566 float *,
int,
float,
float *,
int);
604 float [],
int_t *,
float [],
float []);
613 int klst,
int nbrow,
int_t lptr,
int temp_nbrow,
615 int* indirect_thread,
int* indirect2,
616 int_t ** Lrowind_bc_ptr,
float **Lnzval_bc_ptr,
619 int klst,
int nbrow,
int_t lptr,
int temp_nbrow,
621 int_t ** Ufstnz_br_ptr,
float **Unzval_br_ptr,
647extern void slsum_fmod(
float *,
float *,
float *,
float *,
651extern void slsum_bmod(
float *,
float *,
float *,
657 int,
int_t ,
int *fmod,
679extern void slsum_fmod_inv_gpu_wrap(
int,
int,
int,
int,
float *,
float *,
int,
int,
int_t ,
int *fmod,
C_Tree *,
C_Tree *,
int_t *,
int_t *, int64_t *,
float *, int64_t *,
float *, int64_t *,
int_t *, int64_t *,
int_t *,
int *,
gridinfo_t *,
680int_t , uint64_t* ,uint64_t* ,
float* ,
float* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int* ,
int);
682extern void slsum_bmod_inv_gpu_wrap(
superlu_dist_options_t *,
int,
int,
int,
int,
float *,
float *,
int,
int,
int_t ,
int *,
C_Tree *,
C_Tree *,
int_t *,
int_t *, int64_t *,
int_t *, int64_t *,
int_t *, int64_t *,
float *, int64_t *,
float *, int64_t *,
float *, int64_t *,
int_t *, int64_t *,
int_t *,
gridinfo_t *,
683 int_t, uint64_t*, uint64_t*,
float*,
float*,
684 int*,
int*,
int*,
int*,
685 int*,
int*,
int*,
int*,
int*,
686 int*,
int*,
int*,
int*,
int*,
int*,
687 int*,
int*,
int*,
int);
718 float x[],
float ax[]);
725 float * x,
float * lsum,
float * recvbuf,
726 MPI_Request * send_req,
733 float * x,
float * lsum,
sxT_struct *xT_s,
float * recvbuf,
734 MPI_Request * send_req,
759 float * x,
float * lsum,
761 float * recvbuf,
float* rtemp,
762 MPI_Request * send_req,
768 float * x,
float * lsum,
float * recvbuf,
float* rtemp,
769 MPI_Request * send_req,
797 MPI_Request send_req[],
816 MPI_Request send_req[],
835 MPI_Request send_req[],
853 MPI_Request send_req[],
867 int_t fst_row,
int nrhs,
float *x,
int_t * ilsum,
889 float *x3d,
float *lsum3d,
892 MPI_Request * send_req,
int nrhs,
898 float *x3d,
float *lsum3d,
901 MPI_Request * send_req,
int nrhs,
907 float *x3d,
float *lsum3d,
909 MPI_Request * send_req,
int nrhs,
914 float *x3d,
float *lsum3d,
916 MPI_Request * send_req,
int nrhs,
927 int** sendList, MPI_Request *send_req,
969 float [],
int_t , MPI_Comm);
991 int job,
int Equil,
int *rowequ,
int *colequ,
int *iinfo);
1035#ifdef USE_VENDOR_BLAS
1036extern void sgemm_(
const char*,
const char*,
const int*,
const int*,
const int*,
1037 const float*,
const float*,
const int*,
const float*,
1038 const int*,
const float*,
float*,
const int*,
int,
int);
1039extern void strsv_(
char*,
char*,
char*,
int*,
float*,
int*,
1040 float*,
int*,
int,
int,
int);
1041extern void strsm_(
const char*,
const char*,
const char*,
const char*,
1042 const int*,
const int*,
const float*,
const float*,
const int*,
1043 float*,
const int*,
int,
int,
int,
int);
1044extern void sgemv_(
const char *,
const int *,
const int *,
const float *,
1045 const float *a,
const int *,
const float *,
const int *,
1046 const float *,
float *,
const int *,
int);
1049extern int sgemm_(
const char*,
const char*,
const int*,
const int*,
const int*,
1050 const float*,
const float*,
const int*,
const float*,
1051 const int*,
const float*,
float*,
const int*);
1052extern int strsv_(
char*,
char*,
char*,
int*,
float*,
int*,
1054extern int strsm_(
const char*,
const char*,
const char*,
const char*,
1055 const int*,
const int*,
const float*,
const float*,
const int*,
1056 float*,
const int*);
1057extern void sgemv_(
const char *,
const int *,
const int *,
const float *,
1058 const float *a,
const int *,
const float *,
const int *,
1059 const float *,
float *,
const int *);
1062extern void sger_(
const int*,
const int*,
const float*,
1063 const float*,
const int*,
const float*,
const int*,
1064 float*,
const int*);
1066extern int sscal_(
const int *n,
const float *alpha,
float *dx,
const int *incx);
1067extern int saxpy_(
const int *n,
const float *alpha,
const float *x,
1068 const int *incx,
float *y,
const int *incy);
1072 int m,
int n,
int k,
float alpha,
float *a,
1073 int lda,
float *b,
int ldb,
float beta,
float *c,
int ldc);
1075 const char *transa,
const char *diag,
const int m,
const int n,
1076 const float alpha,
const float *a,
1077 const int lda,
float *b,
const int ldb);
1079 const float *x,
const int incx,
const float *y,
1080 const int incy,
float *a,
const int lda);
1081extern int superlu_sscal(
const int n,
const float alpha,
float *x,
const int incx);
1083 const float *x,
const int incx,
float *y,
const int incy);
1085 const int n,
const float alpha,
const float *a,
1086 const int lda,
const float *x,
const int incx,
1087 const float beta,
float *y,
const int incy);
1089 int n,
float *a,
int lda,
float *x,
int incx);
1091#ifdef SLU_HAVE_LAPACK
1092extern void strtri_(
char*,
char*,
int*,
float*,
int*,
int*);
1097 int *ldb,
float **x,
int *ldx,
1100 int *ldb,
float **x,
int *ldx,
1103 int *ldb,
float **x,
int *ldx,
1106 int nrhs,
float **rhs,
int *ldb,
float **x,
int *ldx,
1134 float *U_mat,
int ldu,
float *bigV,
1139 int *indirect,
int *indirect2,
1140 int_t **Lrowind_bc_ptr,
float **Lnzval_bc_ptr,
1141 int_t **Ufstnz_br_ptr,
float **Unzval_br_ptr,
1143#ifdef SCATTER_PROFILE
1144 ,
double *Host_TheadScatterMOP,
double *Host_TheadScatterTimer
1151sblock_gemm_scatter_lock(
int_t lb,
int_t j, omp_lock_t* lock,
1153 float *L_mat,
int_t ldl,
float *U_mat,
int_t ldu,
1159 int *indirect,
int *indirect2,
1160 int_t **Lrowind_bc_ptr,
float **Lnzval_bc_ptr,
1161 int_t **Ufstnz_br_ptr,
float **Unzval_br_ptr,
1163#ifdef SCATTER_PROFILE
1164 ,
double *Host_TheadScatterMOP,
double *Host_TheadScatterTimer
1173 int* indirect,
int* indirect2,
1181 int* indirect,
int* indirect2,
1188 int* indirect,
int* indirect2,
1195 int* indirect,
int* indirect2,
1202 float *uval,
float *bigU,
int_t ldu,
1208 float * lval,
int_t LD_lval,
1268 float* uval,
float *tempv);
1271 float* uval,
float *tempv);
1315 float* Lval_buf,
float* Uval_buf,
1347 float beta,
float* Lval_buf,
1352 float beta,
float* Uval_buf,
1358 int **ToSendR,
int_t *xsup,
int );
1364 int *ToSendD,
int );
1400 float *BlockUFactor,
float *BlockLFactor,
1401 int* IrecvPlcd_D, MPI_Request *, MPI_Request *,
1405 SCT_t *,
int tag_ub);
1410 MPI_Request *,
float* BlockUFactor,
gridinfo_t *,
1413 float* BlockLFactor,
float* bigV,
1417 MPI_Request *, MPI_Request *,
1418 int_t* Lsub_buf,
float* Lval_buf,
1420 SCT_t *,
int tag_ub);
1422 MPI_Request *,
int_t* Usub_buf,
float* Uval_buf,
1438 float *bigU,
int_t* Lsub_buf,
1439 float* Lval_buf,
int_t* Usub_buf,
1460extern int_t treeFactor(
1462 int_t *perm_c_supno,
1472 int_t * gIperm_c_supno,
1475 double thresh,
SCT_t *SCT,
1482 int_t *perm_c_supno,
1493 int_t * gIperm_c_supno,
1496 double thresh,
SCT_t *SCT,
1502 int_t *perm_c_supno,
1512 int_t * gIperm_c_supno,
1515 double thresh,
SCT_t *SCT,
int tag_ub,
1531 int_t * gIperm_c_supno,
1535 double thresh,
SCT_t *SCT,
int tag_ub,
1570extern int_t ancestorFactor(
1583 int_t * gIperm_c_supno,
1587 double thresh,
SCT_t *SCT,
int tag_ub,
int *info
1594 int nrhs,
handle_t *,
float **RHSptr,
int *ldRHS,
1595 float **ReqPtr,
float **CeqPtr,
1596 int **RpivPtr,
int **CpivPtr,
DiagScale_t *DiagScale,
1597 handle_t *F,
float **Xptr,
int *ldX,
float **Berrs,
1608 float **ReqPtr,
float **CeqPtr,
DiagScale_t *,
int **RpivPtr
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public trans
Definition: superlupara.f90:35
integer, parameter, public factored
Definition: superlupara.f90:35
integer, parameter, public lusup
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: superlu_defs.h:1256
Definition: superlu_defs.h:506
Definition: superlu_defs.h:451
Definition: superlu_defs.h:854
Definition: psymbfact.h:57
Definition: superlu_defs.h:799
Definition: util_dist.h:199
Definition: util_dist.h:101
Definition: supermatrix.h:54
Definition: superlu_defs.h:789
Definition: superlu_defs.h:781
Definition: superlu_defs.h:1012
Definition: superlu_defs.h:1025
Definition: superlu_defs.h:927
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
Definition: superlu_defs.h:404
Definition: superlu_defs.h:834
Definition: superlu_defs.h:1034
Definition: superlu_defs.h:844
Definition: superlu_sdefs.h:350
void * val_tosend
Definition: superlu_sdefs.h:362
int_t * ind_torecv
Definition: superlu_sdefs.h:353
int_t TotalValSend
Definition: superlu_sdefs.h:366
int_t TotalIndSend
Definition: superlu_sdefs.h:364
int_t * ptr_ind_torecv
Definition: superlu_sdefs.h:356
int_t * extern_start
Definition: superlu_sdefs.h:351
int_t * ptr_ind_tosend
Definition: superlu_sdefs.h:354
void * val_torecv
Definition: superlu_sdefs.h:363
int * RecvCounts
Definition: superlu_sdefs.h:360
int_t * ind_tosend
Definition: superlu_sdefs.h:352
int * SendCounts
Definition: superlu_sdefs.h:358
Definition: superlu_defs.h:567
Definition: superlu_defs.h:989
Definition: superlu_sdefs.h:310
float * Lval_buf
Definition: superlu_sdefs.h:312
int_t * Usub_buf
Definition: superlu_sdefs.h:313
int_t * Lsub_buf
Definition: superlu_sdefs.h:311
float * Uval_buf
Definition: superlu_sdefs.h:314
Definition: superlu_sdefs.h:340
int_t * etree
Definition: superlu_sdefs.h:341
sLocalLU_t * Llu
Definition: superlu_sdefs.h:343
char dt
Definition: superlu_sdefs.h:345
Glu_persist_t * Glu_persist
Definition: superlu_sdefs.h:342
strf3Dpartition_t * trf3Dpart
Definition: superlu_sdefs.h:344
Definition: superlu_sdefs.h:97
int * bcols_masked
Definition: superlu_sdefs.h:224
int64_t * Unzval_br_new_offset
Definition: superlu_sdefs.h:152
float ** Unzval_br_new_ptr
Definition: superlu_sdefs.h:150
long int * Linv_bc_offset
Definition: superlu_sdefs.h:113
int_t * ut_modbit
Definition: superlu_sdefs.h:241
C_Tree * LBtree_ptr
Definition: superlu_sdefs.h:175
int_t * Uind_br_dat
Definition: superlu_sdefs.h:141
int64_t * Uind_br_offset
Definition: superlu_sdefs.h:142
int_t * d_Lrowind_bc_dat
Definition: superlu_sdefs.h:262
float ** Lnzval_bc_ptr
Definition: superlu_sdefs.h:104
int * ToSendD
Definition: superlu_sdefs.h:200
int_t ** Ufstnz_br_ptr
Definition: superlu_sdefs.h:164
int_t * Unnz
Definition: superlu_sdefs.h:157
int * d_bcols_masked
Definition: superlu_sdefs.h:285
long int * Lrowind_bc_offset
Definition: superlu_sdefs.h:101
long int * Ucb_indoffset
Definition: superlu_sdefs.h:245
long int * Uinv_bc_offset
Definition: superlu_sdefs.h:161
int_t SolveMsgVol
Definition: superlu_sdefs.h:223
int nfrecvx
Definition: superlu_sdefs.h:208
int64_t Unzval_bc_cnt
Definition: superlu_sdefs.h:133
int_t SolveMsgSent
Definition: superlu_sdefs.h:222
int_t * d_xsup
Definition: superlu_sdefs.h:299
int_t L_SOLVE
Definition: superlu_sdefs.h:231
int64_t * Unzval_bc_offset
Definition: superlu_sdefs.h:132
int nleaf
Definition: superlu_sdefs.h:216
long int * d_Lrowind_bc_offset
Definition: superlu_sdefs.h:263
int_t ** Ucb_valptr
Definition: superlu_sdefs.h:248
int ** fsendx_plist
Definition: superlu_sdefs.h:206
int * bmod
Definition: superlu_sdefs.h:210
int_t ldalsum
Definition: superlu_sdefs.h:221
int_t ** Lrowind_bc_ptr
Definition: superlu_sdefs.h:98
int64_t * Uindval_loc_bc_offset
Definition: superlu_sdefs.h:137
C_Tree * d_LRtree_ptr
Definition: superlu_sdefs.h:301
int64_t * d_Uindval_loc_bc_offset
Definition: superlu_sdefs.h:284
long int * Lindval_loc_bc_offset
Definition: superlu_sdefs.h:121
long int Unzval_br_cnt
Definition: superlu_sdefs.h:172
int_t * Lindval_loc_bc_dat
Definition: superlu_sdefs.h:120
float ** Unzval_br_ptr
Definition: superlu_sdefs.h:169
int_t * utrecv
Definition: superlu_sdefs.h:237
int nfsendx
Definition: superlu_sdefs.h:209
int_t * d_Ucolind_br_dat
Definition: superlu_sdefs.h:272
int64_t * Ucolind_br_offset
Definition: superlu_sdefs.h:147
int64_t * d_Ucolind_br_offset
Definition: superlu_sdefs.h:273
int64_t * d_Ucolind_bc_offset
Definition: superlu_sdefs.h:267
float * d_Unzval_bc_dat
Definition: superlu_sdefs.h:270
float * d_Uinv_bc_dat
Definition: superlu_sdefs.h:278
int_t ** ut_sendx_plist
Definition: superlu_sdefs.h:236
int * mod_bit
Definition: superlu_sdefs.h:215
long int Ucb_valcnt
Definition: superlu_sdefs.h:251
long int Uinv_bc_cnt
Definition: superlu_sdefs.h:162
long int * Lnzval_bc_offset
Definition: superlu_sdefs.h:107
float * d_Unzval_br_new_dat
Definition: superlu_sdefs.h:274
int64_t Ucolind_bc_cnt
Definition: superlu_sdefs.h:128
int_t * d_ilsum
Definition: superlu_sdefs.h:298
long int * d_Lnzval_bc_offset
Definition: superlu_sdefs.h:265
float * Uinv_bc_dat
Definition: superlu_sdefs.h:160
int64_t * d_Uind_br_offset
Definition: superlu_sdefs.h:269
int_t * Ucb_valdat
Definition: superlu_sdefs.h:249
int * frecv
Definition: superlu_sdefs.h:207
int_t * d_Uind_br_dat
Definition: superlu_sdefs.h:268
int_t inv
Definition: superlu_sdefs.h:256
Ucb_indptr_t ** Ucb_indptr
Definition: superlu_sdefs.h:243
int_t * Ufstnz_br_dat
Definition: superlu_sdefs.h:165
int_t * d_Ucolind_bc_dat
Definition: superlu_sdefs.h:266
int_t * utmod
Definition: superlu_sdefs.h:235
int64_t Unzval_br_new_cnt
Definition: superlu_sdefs.h:153
int * fmod
Definition: superlu_sdefs.h:205
long int * d_Unzval_bc_offset
Definition: superlu_sdefs.h:271
int_t n_utsendx
Definition: superlu_sdefs.h:238
int * brecv
Definition: superlu_sdefs.h:212
int_t * Uindval_loc_bc_dat
Definition: superlu_sdefs.h:136
float * Unzval_br_dat
Definition: superlu_sdefs.h:170
int64_t * d_Unzval_br_new_offset
Definition: superlu_sdefs.h:275
int64_t Ucolind_br_cnt
Definition: superlu_sdefs.h:148
long int * Ufstnz_br_offset
Definition: superlu_sdefs.h:166
C_Tree * d_UBtree_ptr
Definition: superlu_sdefs.h:302
int_t n
Definition: superlu_sdefs.h:254
long int * Ucb_valoffset
Definition: superlu_sdefs.h:250
int nbcol_masked
Definition: superlu_sdefs.h:257
int nbsendx
Definition: superlu_sdefs.h:214
int_t ** Ucolind_br_ptr
Definition: superlu_sdefs.h:145
int ** bsendx_plist
Definition: superlu_sdefs.h:211
C_Tree * LRtree_ptr
Definition: superlu_sdefs.h:176
float ** Linv_bc_ptr
Definition: superlu_sdefs.h:110
int_t ut_ldalsum
Definition: superlu_sdefs.h:233
C_Tree * URtree_ptr
Definition: superlu_sdefs.h:178
int_t * d_Lindval_loc_bc_dat
Definition: superlu_sdefs.h:281
int_t n_utrecvx
Definition: superlu_sdefs.h:239
int64_t Uind_br_cnt
Definition: superlu_sdefs.h:143
Ucb_indptr_t * Ucb_inddat
Definition: superlu_sdefs.h:244
gridinfo_t * d_grid
Definition: superlu_sdefs.h:304
int_t n_utrecvmod
Definition: superlu_sdefs.h:240
int_t * Urbs
Definition: superlu_sdefs.h:242
int_t * Lrowind_bc_dat
Definition: superlu_sdefs.h:100
float * Unzval_br_new_dat
Definition: superlu_sdefs.h:151
int64_t * Ucolind_bc_offset
Definition: superlu_sdefs.h:127
float ** Uinv_bc_ptr
Definition: superlu_sdefs.h:159
float * Linv_bc_dat
Definition: superlu_sdefs.h:112
int ** ToSendR
Definition: superlu_sdefs.h:201
int_t ** Uindval_loc_bc_ptr
Definition: superlu_sdefs.h:135
long int Lnzval_bc_cnt
Definition: superlu_sdefs.h:108
int_t ** Ucolind_bc_ptr
Definition: superlu_sdefs.h:125
float * d_Linv_bc_dat
Definition: superlu_sdefs.h:277
int_t ** Lindval_loc_bc_ptr
Definition: superlu_sdefs.h:116
int_t * Ucolind_br_dat
Definition: superlu_sdefs.h:146
int_t ** Uind_br_ptr
Definition: superlu_sdefs.h:140
C_Tree * UBtree_ptr
Definition: superlu_sdefs.h:177
C_Tree * d_LBtree_ptr
Definition: superlu_sdefs.h:300
int_t FRECV
Definition: superlu_sdefs.h:232
int_t UT_SOLVE
Definition: superlu_sdefs.h:230
int64_t Uindval_loc_bc_cnt
Definition: superlu_sdefs.h:138
long int Ucb_indcnt
Definition: superlu_sdefs.h:246
C_Tree * d_URtree_ptr
Definition: superlu_sdefs.h:303
long int * Unzval_br_offset
Definition: superlu_sdefs.h:171
float * Lnzval_bc_dat
Definition: superlu_sdefs.h:106
long int Ufstnz_br_cnt
Definition: superlu_sdefs.h:167
int_t * ut_ilsum
Definition: superlu_sdefs.h:234
int nbrecvx
Definition: superlu_sdefs.h:213
int_t nfrecvmod
Definition: superlu_sdefs.h:255
int nroot
Definition: superlu_sdefs.h:217
int_t * Ucolind_bc_dat
Definition: superlu_sdefs.h:126
long int Linv_bc_cnt
Definition: superlu_sdefs.h:114
long int Lrowind_bc_cnt
Definition: superlu_sdefs.h:102
long int * d_Linv_bc_offset
Definition: superlu_sdefs.h:279
int_t ** Lrowind_bc_2_lsum
Definition: superlu_sdefs.h:158
long int * d_Uinv_bc_offset
Definition: superlu_sdefs.h:280
int_t * ilsum
Definition: superlu_sdefs.h:219
long int Lindval_loc_bc_cnt
Definition: superlu_sdefs.h:122
int * ToRecv
Definition: superlu_sdefs.h:199
int64_t * d_Lindval_loc_bc_offset
Definition: superlu_sdefs.h:282
int_t * d_Uindval_loc_bc_dat
Definition: superlu_sdefs.h:283
float * Unzval_bc_dat
Definition: superlu_sdefs.h:131
float * ujrow
Definition: superlu_sdefs.h:189
float * d_Lnzval_bc_dat
Definition: superlu_sdefs.h:264
float ** Unzval_bc_ptr
Definition: superlu_sdefs.h:130
Definition: superlu_sdefs.h:371
int_t * inv_perm_c
Definition: superlu_sdefs.h:373
int * d_bmod
Definition: superlu_sdefs.h:390
int * d_fmod
Definition: superlu_sdefs.h:389
psgsmv_comm_t * gsmv_comm
Definition: superlu_sdefs.h:375
int_t * diag_len
Definition: superlu_sdefs.h:374
pxgstrs_comm_t * gstrs_comm
Definition: superlu_sdefs.h:377
int_t * row_to_proc
Definition: superlu_sdefs.h:372
NRformat_loc3d * A3d
Definition: superlu_sdefs.h:383
int_t * xrow_to_proc
Definition: superlu_sdefs.h:382
float * d_x
Definition: superlu_sdefs.h:388
float * d_lsum
Definition: superlu_sdefs.h:387
int_t * A_colind_gsmv
Definition: superlu_sdefs.h:378
Definition: superlu_sdefs.h:76
float * R
Definition: superlu_sdefs.h:78
DiagScale_t DiagScale
Definition: superlu_sdefs.h:77
float * C
Definition: superlu_sdefs.h:79
int_t * perm_c
Definition: superlu_sdefs.h:81
int_t * perm_r
Definition: superlu_sdefs.h:80
Definition: superlu_sdefs.h:467
float * BlockUFactor
Definition: superlu_sdefs.h:469
float * BlockLFactor
Definition: superlu_sdefs.h:468
Definition: superlu_sdefs.h:481
float * tX
Definition: superlu_sdefs.h:482
float * tU
Definition: superlu_sdefs.h:483
int_t * indCols
Definition: superlu_sdefs.h:484
Definition: superlu_sdefs.h:461
float * bigV
Definition: superlu_sdefs.h:463
float * bigU
Definition: superlu_sdefs.h:462
Definition: superlu_sdefs.h:318
sForest_t ** sForests
Definition: superlu_sdefs.h:326
int * supernodeMask
Definition: superlu_sdefs.h:328
int_t * myTreeIdxs
Definition: superlu_sdefs.h:323
int_t nsupers
Definition: superlu_sdefs.h:319
int_t * myZeroTrIdxs
Definition: superlu_sdefs.h:324
sLUValSubBuf_t * LUvsb
Definition: superlu_sdefs.h:329
SupernodeToGridMap_t * superGridMap
Definition: superlu_sdefs.h:330
int_t * supernode2treeMap
Definition: superlu_sdefs.h:327
int_t * myNodeCount
Definition: superlu_sdefs.h:322
int maxLvl
Definition: superlu_sdefs.h:331
gEtreeInfo_t gEtreeInfo
Definition: superlu_sdefs.h:320
int_t ** treePerm
Definition: superlu_sdefs.h:325
int * diagDims
Definition: superlu_sdefs.h:335
int * gemmCsizes
Definition: superlu_sdefs.h:336
int_t * iperm_c_supno
Definition: superlu_sdefs.h:321
int mxLeafNode
Definition: superlu_sdefs.h:334
Definition: superlu_defs.h:773
Definition: superlu_defs.h:728
Definition: superlu_sdefs.h:474
float * xT
Definition: superlu_sdefs.h:475
int_t * ilsumT
Definition: superlu_sdefs.h:477
int_t ldaspaT
Definition: superlu_sdefs.h:476
Definition: superlu_defs.h:970
Definition: superlu_defs.h:825
Definition: superlu_defs.h:1040
Definitions which are precision-neutral.
SupernodeToGridMap_t
Definition: superlu_defs.h:1291
trtype_t
Definition: superlu_defs.h:1317
int64_t handle_t
Definition: superlu_defs.h:348
#define NBUFFERS
Definition: superlu_defs.h:205
int64_t int_t
Definition: superlu_defs.h:119
DiagScale_t
Definition: superlu_enum_consts.h:35
fact_t
Definition: superlu_enum_consts.h:30
int screate_matrix_postfix(SuperMatrix *, int, float **, int *, float **, int *, FILE *, char *, gridinfo_t *)
Definition: screate_matrix.c:76
void psgsrfs_ABXglobal(superlu_dist_options_t *, int_t, SuperMatrix *, float, sLUstruct_t *, gridinfo_t *, float *, int_t, float *, int_t, int, float *, SuperLUStat_t *, int *)
Definition: psgsrfs_ABXglobal.c:125
int_t szRecvLPanel(int_t k, int_t sender, float alpha, float beta, float *Lval_buf, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void Printfloat5(char *, int_t, float *)
Definition: sutil_dist.c:614
void psgssvx(superlu_dist_options_t *, SuperMatrix *, sScalePermstruct_t *, float *, int, int, gridinfo_t *, sLUstruct_t *, sSOLVEstruct_t *, float *, SuperLUStat_t *, int *)
int_t initPackLUInfo(int_t nsupers, packLUInfo_t *packLUInfo)
Definition: treeFactorization.c:168
int screate_matrix(SuperMatrix *, int, float **, int *, float **, int *, FILE *, gridinfo_t *)
Definition: screate_matrix.c:348
int screate_matrix_postfix3d(SuperMatrix *A, int nrhs, float **rhs, int *ldb, float **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
Definition: screate_matrix3d.c:72
void sGatherNRformat_loc3d_allgrid(fact_t Fact, NRformat_loc *A, float *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **)
int_t sTrs2_GatherTrsmScatter(int_t klst, int_t iukp, int_t rukp, int_t *usub, float *uval, float *tempv, int_t knsupc, int nsupr, float *lusup, Glu_persist_t *Glu_persist)
Definition: psgstrf2.c:804
void sDestroy_A3d_gathered_on_2d(sSOLVEstruct_t *, gridinfo3d_t *)
Definition: psutil.c:1244
int_t psReDistribute3d_X_to_B(int_t n, float *B, int_t m_loc, int_t ldb, int_t fst_row, int nrhs, float *x, int_t *ilsum, sScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo3d_t *grid3d, sSOLVEstruct_t *SOLVEstruct)
Definition: psgstrs3d.c:6408
int_t psgsTrForwardSolve3d(superlu_dist_options_t *options, int_t n, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *x3d, float *lsum3d, sxT_struct *xT_s, float *recvbuf, MPI_Request *send_req, int nrhs, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:7312
int updateDirtyBit(int_t k0, HyP_t *HyP, gridinfo_t *grid)
Definition: sec_structs.c:651
int superlu_sgemv(const char *trans, const int m, const int n, const float alpha, const float *a, const int lda, const float *x, const int incx, const float beta, float *y, const int incy)
float psdistribute3d_Yang(superlu_dist_options_t *options, int_t n, SuperMatrix *A, sScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: psdistribute3d.c:25
int_t sbroadcastAncestor3d(strf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void sgstrf2(int_t k, float *diagBlk, int_t LDA, float *BlockUfactor, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
Definition: psgstrf2.c:404
void sCreate_CompCol_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, float *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
int_t sgatherAllFactoredLU(strf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t sIBcastRecvLPanel(int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Lsub_buf, float *Lval_buf, int *factored, gridinfo_t *, sLUstruct_t *, SCT_t *, int tag_ub)
void psgstrs_Bglobal(superlu_dist_options_t *, int_t, sLUstruct_t *, gridinfo_t *, float *, int_t, int, SuperLUStat_t *, int *)
Definition: psgstrs_Bglobal.c:108
int_t sscatter3dUPanels(int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
void psgsrfs(superlu_dist_options_t *, int_t, SuperMatrix *, float, sLUstruct_t *, sScalePermstruct_t *, gridinfo_t *, float[], int_t, float[], int_t, int, sSOLVEstruct_t *, float *, SuperLUStat_t *, int *)
int saxpy_(const int *n, const float *alpha, const float *x, const int *incx, float *y, const int *incy)
int sPrint_CompRowLoc_Matrix_dist(SuperMatrix *)
int_t sWait_LRecv(MPI_Request *, int *msgcnt, int *msgcntsU, gridinfo_t *, SCT_t *)
int superlu_sscal(const int n, const float alpha, float *x, const int incx)
int_t slasum_bmod_Tree(int_t pTree, int_t cTree, float *lsum, float *x, sxT_struct *xT_s, int nrhs, slsumBmod_buff_t *lbmod_buf, sLUstruct_t *LUstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, SuperLUStat_t *stat)
Definition: psgstrs3d.c:3892
void sGatherNRformat_loc3d(fact_t Fact, NRformat_loc *A, float *B, int ldb, int nrhs, gridinfo3d_t *grid3d, NRformat_loc3d **)
int psCompRow_loc_to_CompCol_global(int_t, SuperMatrix *, gridinfo_t *, SuperMatrix *)
Gather A from the distributed compressed row format to global A in compressed column format.
Definition: psutil.c:35
void slsum_bmod_inv_gpu_wrap(superlu_dist_options_t *, int, int, int, int, float *, float *, int, int, int_t, int *, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, int_t *, int64_t *, int_t *, int64_t *, float *, int64_t *, float *, int64_t *, float *, int64_t *, int_t *, int64_t *, int_t *, gridinfo_t *, int_t, uint64_t *, uint64_t *, float *, float *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int)
void pslaqgs(SuperMatrix *, float *, float *, float, float, float, char *)
Definition: pslaqgs.c:85
int psgsmv_AXglobal_abs(int_t, int_t[], float[], int_t[], float[], float[])
Definition: psgsmv_AXglobal.c:285
void sCreate_SuperNode_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, float *, int_t *, int_t *, int_t *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
int_t szRecvUPanel(int_t k, int_t sender, float alpha, float beta, float *Uval_buf, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
int_t sinitLsumBmod_buff(int_t ns, int nrhs, slsumBmod_buff_t *lbmod_buf)
Definition: psgstrs3d.c:3925
void psgssvx3d(superlu_dist_options_t *, SuperMatrix *, sScalePermstruct_t *, float B[], int ldb, int nrhs, gridinfo3d_t *, sLUstruct_t *, sSOLVEstruct_t *, float *berr, SuperLUStat_t *, int *info)
Definition: psgssvx3d.c:710
void psconvertU(superlu_dist_options_t *, gridinfo_t *, sLUstruct_t *, SuperLUStat_t *, int)
int_t sDiagFactIBCast(int_t k, int_t k0, float *BlockUFactor, float *BlockLFactor, int *IrecvPlcd_D, MPI_Request *, MPI_Request *, MPI_Request *, MPI_Request *, gridinfo_t *, superlu_dist_options_t *, double thresh, sLUstruct_t *LUstruct, SuperLUStat_t *, int *info, SCT_t *, int tag_ub)
int_t sLPanelTrSolve(int_t k, int *factored_L, float *BlockUFactor, gridinfo_t *, sLUstruct_t *)
int_t sAllocLlu_3d(int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t scuStatUpdate(int_t knsupc, HyP_t *HyP, SCT_t *SCT, SuperLUStat_t *stat)
Definition: sec_structs.c:668
void sCompRow_to_CompCol_dist(int_t, int_t, int_t, float *, int_t *, int_t *, float **, int_t **, int_t **)
void psgstrs2(int_t m, int_t k0, int_t k, Glu_persist_t *Glu_persist, gridinfo_t *grid, sLocalLU_t *Llu, SuperLUStat_t *stat)
void sbcastPermutedSparseA(SuperMatrix *A, sScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: s3DPartition.c:187
void psinf_norm_error(int, int_t, int_t, float[], int_t, float[], int_t, MPI_Comm)
Check the inf-norm of the error vector.
Definition: psutil.c:1274
void psgstrs3d(superlu_dist_options_t *, int_t n, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info)
Definition: psgstrs3d.c:6608
int_t sp3dCollect(int_t layer, int_t n, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
float sdistribute(superlu_dist_options_t *, int_t, SuperMatrix *, Glu_freeable_t *, sLUstruct_t *, gridinfo_t *)
Definition: sdistribute.c:67
void sreadrb_dist(int, FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadrb.c:275
void sPrint_Dense_Matrix_dist(SuperMatrix *)
int_t sLPanelUpdate(int_t k, int *IrecvPlcd_D, int *factored_L, MPI_Request *, float *BlockUFactor, gridinfo_t *, sLUstruct_t *, SCT_t *)
void validateInput_psgssvx3d(superlu_dist_options_t *, SuperMatrix *A, int ldb, int nrhs, gridinfo3d_t *, int *info)
Validates the input parameters for a given problem.
Definition: sssvx3dAux.c:24
int sAllocGlu_3d(int_t n, int_t nsupers, sLUstruct_t *)
Definition: sutil_dist.c:471
void sGenXtrue_dist(int_t, int_t, float *, int_t)
Definition: sutil_dist.c:525
void sClone_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *)
int getNsupers(int, Glu_persist_t *)
Definition: trfAux.c:42
void slsum_fmod_inv(float *, float *, float *, float *, int, int_t, int *fmod, int_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int_t, int_t, int, int)
Definition: psgstrs_lsum.c:416
int sDeAllocLlu_3d(int_t n, sLUstruct_t *, gridinfo3d_t *)
Definition: sutil_dist.c:489
float * sgetBigU(superlu_dist_options_t *, int_t, gridinfo_t *, sLUstruct_t *)
int sread_binary(FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sbinary_io.c:4
void sComputeLevelsets(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *, int_t *)
float psdistribute(superlu_dist_options_t *, int_t, SuperMatrix *, sScalePermstruct_t *, Glu_freeable_t *, sLUstruct_t *, gridinfo_t *)
Definition: psdistribute.c:329
strf3Dpartition_t * sinitTrf3DpartitionLUstructgrid0(int_t n, superlu_dist_options_t *options, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
sdiagFactBufs_t ** sinitDiagFactBufsArr(int mxLeafNode, int ldt, gridinfo_t *grid)
void sPrint_CompCol_Matrix_dist(SuperMatrix *)
int_t szeroSetLU(int_t nnodes, int_t *nodeList, sLUstruct_t *, gridinfo3d_t *)
void * suser_malloc_dist(int_t, int_t)
Definition: smemory_dist.c:30
void sPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *)
Print the blocks in the factored matrix L.
Definition: sutil_dist.c:730
int_t sblock_gemm_scatterTopLeft(int_t lb, int_t j, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, sLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int_t sIBcastRecvUPanel(int_t k, int_t k0, int *msgcnt, MPI_Request *, MPI_Request *, int_t *Usub_buf, float *Uval_buf, gridinfo_t *, sLUstruct_t *, SCT_t *, int tag_ub)
int file_sPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A)
void slsum_bmod_GG_newsolve(strf3Dpartition_t *trf3Dpartition, float *lsum, float *x, float *xk, int nrhs, slsumBmod_buff_t *lbmod_buf, int_t k, int *bmod, int_t *Urbs, Ucb_indptr_t **Ucb_indptr, int_t **Ucb_valptr, int_t *xsup, gridinfo_t *grid, sLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:5872
void Free_HyP(HyP_t *HyP)
Definition: sec_structs.c:627
void psGetDiagU(int_t, sLUstruct_t *, gridinfo_t *, float *)
Definition: psGetDiagU.c:66
int_t sreduceSolvedX_newsolve(int_t treeId, int_t sender, int_t receiver, float *x, int nrhs, strf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, float *recvbuf, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:1493
int_t psgstrf3d(superlu_dist_options_t *, int m, int n, float anorm, strf3Dpartition_t *, SCT_t *, sLUstruct_t *, gridinfo3d_t *, SuperLUStat_t *, int *)
Definition: psgstrf3d.c:121
int_t sISend_LDiagBlock(int_t k0, float *lblk_ptr, int_t size, MPI_Request *, gridinfo_t *, int)
void sger_(const int *, const int *, const float *, const float *, const int *, const float *, const int *, float *, const int *)
int_t sreduceAncestors3d(int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, float *Lval_buf, float *Uval_buf, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
float * sready_lsum
Definition: psgstrs3d.c:150
int_t scollect3dUpanels(int_t layer, int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void sgemv_(const char *, const int *, const int *, const float *, const float *a, const int *, const float *, const int *, const float *, float *, const int *)
void sinf_norm_error_dist(int_t, int_t, float *, int_t, float *, int_t, gridinfo_t *)
Check the inf-norm of the error vector.
Definition: sutil_dist.c:593
void sScalePermstructFree(sScalePermstruct_t *)
Deallocate ScalePermstruct.
Definition: sutil_dist.c:448
int_t sp3dScatter(int_t n, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
void sCreate_CompRowLoc_Matrix_dist(SuperMatrix *, int_t, int_t, int_t, int_t, int_t, float *, int_t *, int_t *, Stype_t, Dtype_t, Mtype_t)
int sstatic_schedule(superlu_dist_options_t *, int, int, sLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int_t *, int_t *, int *)
Definition: sstatic_schedule.c:46
void sscaleMatrixDiagonally(fact_t Fact, sScalePermstruct_t *, SuperMatrix *, SuperLUStat_t *, gridinfo_t *, int *rowequ, int *colequ, int *iinfo)
Definition: sssvx3dAux.c:162
void snewTrfPartitionInit(int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: s3DPartition.c:5
int_t psgsTrBackSolve3d_newsolve(superlu_dist_options_t *options, int_t n, sLUstruct_t *LUstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *x3d, float *lsum3d, float *recvbuf, MPI_Request *send_req, int nrhs, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:7692
int_t checkRecvUDiag(int_t k, commRequests_t *comReqs, gridinfo_t *grid, SCT_t *SCT)
Definition: treeFactorization.c:205
float * floatCalloc_dist(int_t)
Definition: smemory_dist.c:162
int sSolveInit(superlu_dist_options_t *, SuperMatrix *, int_t[], int_t[], int_t, sLUstruct_t *, gridinfo_t *, sSOLVEstruct_t *)
Initialize the data structure for the solution phase.
Definition: psutil.c:1108
int superlu_saxpy(const int n, const float alpha, const float *x, const int incx, float *y, const int incy)
int_t sblock_gemm_scatterBottomRight(int_t lb, int_t j, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, sLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int superlu_sgemm(const char *transa, const char *transb, int m, int n, int k, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc)
void sCopy_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *)
Definition: sutil_dist.c:363
float sMaxAbsLij(int iam, int n, Glu_persist_t *, sLUstruct_t *, gridinfo_t *)
Find max(abs(L(i,j)))
Definition: sutil_dist.c:641
float scomputeA_Norm(int notran, SuperMatrix *, gridinfo_t *)
This function computes the norm of a matrix A.
Definition: sssvx3dAux.c:563
void sGenCSRLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *, float **, int_t **, int_t **, int_t *, int_t *)
int_t sblock_gemm_scatterTopRight(int_t lb, int_t j, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, sLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int sDeAllocGlu_3d(sLUstruct_t *)
Definition: sutil_dist.c:481
void psgssvx_ABglobal(superlu_dist_options_t *, SuperMatrix *, sScalePermstruct_t *, float *, int, int, gridinfo_t *, sLUstruct_t *, float *, SuperLUStat_t *, int *)
void psgstrf2_trsm(superlu_dist_options_t *options, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, sLocalLU_t *, MPI_Request *, int tag_ub, SuperLUStat_t *, int *info)
Definition: psgstrf2.c:143
int_t sIBcast_UPanel(int_t k, int_t k0, int_t *usub, float *uval, gridinfo_t *, int *msgcnt, MPI_Request *, int *ToSendD, int)
void sinit3DLUstructForest(int_t *myTreeIdxs, int_t *myZeroTrIdxs, sForest_t **sForests, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t ssparseTreeFactor(int_t nnodes, int_t *perm_c_supno, treeTopoInfo_t *treeTopoInfo, commRequests_t *comReqs, sscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, sLUValSubBuf_t *LUvsb, sdiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int *info)
void slsum_bmod_GG(float *lsum, float *x, float *xk, int nrhs, slsumBmod_buff_t *lbmod_buf, int_t k, int *bmod, int_t *Urbs, Ucb_indptr_t **Ucb_indptr, int_t **Ucb_valptr, int_t *xsup, gridinfo_t *grid, sLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:5687
int strsm_(const char *, const char *, const char *, const char *, const int *, const int *, const float *, const float *, const int *, float *, const int *)
int sreduceAllAncestors3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm, sLUValSubBuf_t *LUvsb, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void sreadtriple_dist(FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadtriple.c:35
int_t slsumForestBsolve(int_t k, int_t treeId, float *lsum, float *x, sxT_struct *xT_s, int nrhs, slsumBmod_buff_t *lbmod_buf, sLUstruct_t *LUstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, SuperLUStat_t *stat)
Definition: psgstrs3d.c:4045
int s_c2cpp_GetHWPM(SuperMatrix *, gridinfo_t *, sScalePermstruct_t *)
Definition: s_c2cpp_GetHWPM.cpp:55
void sScalePermstructInit(const int_t, const int_t, sScalePermstruct_t *)
Allocate storage in ScalePermstruct.
Definition: sutil_dist.c:437
void sscatter_l(int ib, int ljb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *usub, int_t *lsub, float *tempv, int *indirect_thread, int *indirect2, int_t **Lrowind_bc_ptr, float **Lnzval_bc_ptr, gridinfo_t *grid)
int_t psgstrs_delete_device_lsum_x(sSOLVEstruct_t *)
Definition: psutil.c:1067
float psdistribute_allgrid(superlu_dist_options_t *options, int_t n, SuperMatrix *A, sScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, sLUstruct_t *LUstruct, gridinfo_t *grid, int *supernodeMask)
Definition: psdistribute.c:2331
int screate_matrix_rb(SuperMatrix *, int, float **, int *, float **, int *, FILE *, gridinfo_t *)
int_t sfsolveReduceLsum3d(int_t treeId, int_t sender, int_t receiver, float *lsum, float *recvbuf, int nrhs, strf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:1647
void psgstrf2(superlu_dist_options_t *, int_t nsupers, int_t k0, int_t k, double thresh, Glu_persist_t *, gridinfo_t *, sLocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *)
int sfreeScuBufs(sscuBufs_t *scuBufs)
int_t sdenseTreeFactor(int_t nnnodes, int_t *perm_c_supno, commRequests_t *comReqs, sscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t *msgs, sLUValSubBuf_t *LUvsb, sdiagFactBufs_t *dFBuf, factStat_t *factStat, factNodelists_t *fNlists, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
int_t sIBcast_LPanel(int_t k, int_t k0, int_t *lsub, float *lusup, gridinfo_t *, int *msgcnt, MPI_Request *, int **ToSendR, int_t *xsup, int)
void sreadhb_dist(int, FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadhb.c:107
int freePackLUInfo(packLUInfo_t *packLUInfo)
Definition: treeFactorization.c:177
int sinitDiagFactBufs(int ldt, sdiagFactBufs_t *dFBuf)
int_t psgstrs_init(int_t, int_t, int_t, int_t, int_t[], int_t[], gridinfo_t *grid, Glu_persist_t *, sSOLVEstruct_t *)
Definition: psutil.c:650
int_t sWait_URecv(MPI_Request *, int *msgcnt, SCT_t *)
int_t sbsolve_Xt_bcast(int_t ilvl, sxT_struct *xT_s, int nrhs, strf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:1720
sdiagFactBufs_t ** sinitDiagFactBufsArrMod(int mxLeafNode, int *ldts, gridinfo_t *grid)
void nv_init_wrapper(MPI_Comm)
int_t sIRecv_UDiagBlock(int_t k0, float *ublk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int)
void s3D_printMemUse(strf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: smemory_dist.c:243
int_t sIrecv_LPanel(int_t k, int_t k0, int_t *Lsub_buf, float *Lval_buf, gridinfo_t *, MPI_Request *, sLocalLU_t *, int)
int_t snonLeafForestForwardSolve3d(int_t treeId, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *x, float *lsum, sxT_struct *xT_s, float *recvbuf, float *rtemp, MPI_Request *send_req, int nrhs, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:1900
void sGenCSCLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *, float **, int_t **, int_t **, int_t *, int_t *)
int_t psgsTrForwardSolve3d_newsolve(superlu_dist_options_t *options, int_t n, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *x3d, float *lsum3d, float *recvbuf, MPI_Request *send_req, int nrhs, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:7458
int superlu_strsv(char *uplo, char *trans, char *diag, int n, float *a, int lda, float *x, int incx)
void sZeroLblocks(int, int, gridinfo_t *, sLUstruct_t *)
Sets all entries of matrix L to zero.
Definition: sutil_dist.c:779
int_t psgstrf(superlu_dist_options_t *, int, int, float anorm, sLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *)
Definition: psgstrf.c:243
int sfreeDiagFactBufsArr(int mxLeafNode, sdiagFactBufs_t **dFBufs)
int_t sp2pSolvedX3d(int_t treeId, int_t sender, int_t receiver, float *x, int nrhs, strf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:1597
void sGenCOOLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *, int_t **, int_t **, float **, int_t *, int_t *)
int_t sPackLBlock(int_t k, float *Dest, Glu_persist_t *, gridinfo_t *, sLocalLU_t *)
strf3Dpartition_t * sinitTrf3Dpartition(int_t nsupers, superlu_dist_options_t *options, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
void Local_Sgstrf2(superlu_dist_options_t *options, int_t k, double thresh, float *BlockUFactor, Glu_persist_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t *, int *info, SCT_t *)
#define MAX_LOOKAHEADS
Definition: superlu_sdefs.h:96
float * sready_x
Definition: psgstrs3d.c:150
int_t szSendLPanel(int_t k, int_t receiver, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void suser_free_dist(int_t, int_t)
Definition: smemory_dist.c:49
struct slsumBmod_buff_t slsumBmod_buff_t
int psgssvx3d_csc_batch(superlu_dist_options_t *, int batchCount, int m, int n, int nnz, int nrhs, handle_t *, float **RHSptr, int *ldRHS, float **ReqPtr, float **CeqPtr, int **RpivPtr, int **CpivPtr, DiagScale_t *DiagScale, handle_t *F, float **Xptr, int *ldX, float **Berrs, gridinfo3d_t *grid3d, SuperLUStat_t *stat, int *info)
Solve a batch of linear systems Ai * Xi = Bi with direct method, computing the LU factorization of ea...
Definition: psgssvx3d_csc_batch.c:81
int_t sIRecv_LDiagBlock(int_t k0, float *L_blk_ptr, int_t size, int_t src, MPI_Request *, gridinfo_t *, SCT_t *, int)
int superlu_strsm(const char *sideRL, const char *uplo, const char *transa, const char *diag, const int m, const int n, const float alpha, const float *a, const int lda, float *b, const int ldb)
void sperform_row_permutation(superlu_dist_options_t *, fact_t Fact, sScalePermstruct_t *, sLUstruct_t *LUstruct, int_t m, int_t n, gridinfo_t *, SuperMatrix *A, SuperMatrix *GA, SuperLUStat_t *, int job, int Equil, int *rowequ, int *colequ, int *iinfo)
Definition: sssvx3dAux.c:465
void sDestroy_LU(int_t, gridinfo_t *, sLUstruct_t *)
Destroy distributed L & U matrices.
Definition: psutil.c:442
int_t slsumReducePrK(int_t k, float *x, float *lsum, float *recvbuf, int nrhs, sLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:4148
int psgsmv_AXglobal(int_t, int_t[], float[], int_t[], float[], float[])
Definition: psgsmv_AXglobal.c:259
void sprepare_multiGPU_buffers(int, int, int, int, int, int)
void psconvert_flatten_skyline2UROWDATA(superlu_dist_options_t *, gridinfo_t *, sLUstruct_t *, SuperLUStat_t *, int n)
Definition: psgssvx.c:2330
int_t sUPanelTrSolve(int_t k, float *BlockLFactor, float *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, sLUstruct_t *, SuperLUStat_t *, SCT_t *)
int psgsmv_AXglobal_setup(SuperMatrix *, Glu_persist_t *, gridinfo_t *, int_t *, int_t *[], float *[], int_t *[], int_t[])
void slsum_fmod_inv_master(float *, float *, float *, float *, int, int, int_t, int *fmod, int_t, int_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t **, int_t, int_t, int_t, int_t, int, int)
Definition: psgstrs_lsum.c:963
void sZero_CompRowLoc_Matrix_dist(SuperMatrix *)
Sets all entries of a matrix to zero, A_{i,j}=0, for i,j=1,..,n.
Definition: sutil_dist.c:378
void slsum_fmod(float *, float *, float *, float *, int, int, int_t, int *fmod, int_t, int_t, int_t, int_t *, gridinfo_t *, sLocalLU_t *, MPI_Request[], SuperLUStat_t *)
Definition: psgstrs_lsum.c:62
void sallocateA_dist(int_t, int_t, float **, int_t **, int_t **)
Definition: smemory_dist.c:147
void slsum_fmod_leaf_newsolve(strf3Dpartition_t *trf3Dpartition, float *lsum, float *x, float *xk, float *rtemp, int nrhs, int knsupc, int_t k, int *fmod, int_t nlb, int_t lptr, int_t luptr, int_t *xsup, gridinfo_t *grid, sLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:3704
float sdist_psymbtonum(superlu_dist_options_t *, int_t, SuperMatrix *, sScalePermstruct_t *, Pslu_freeable_t *, sLUstruct_t *, gridinfo_t *)
Definition: pssymbfact_distdata.c:1219
int_t sIrecv_UPanel(int_t k, int_t k0, int_t *Usub_buf, float *, sLocalLU_t *, gridinfo_t *, MPI_Request *, int)
void sgather_u(int_t num_u_blks, Ublock_info_t *Ublock_info, int_t *usub, float *uval, float *bigU, int_t ldu, int_t *xsup, int_t klst)
void sLUstructInit(const int_t, sLUstruct_t *)
Allocate storage in LUstruct.
Definition: psutil.c:408
int_t sgatherFactoredLU(int_t sender, int_t receiver, int_t nnodes, int_t *nodeList, sLUValSubBuf_t *LUvsb, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void sSolveFinalize(superlu_dist_options_t *, sSOLVEstruct_t *)
Release the resources used for the solution phase.
Definition: psutil.c:1196
void sDestroy_trf3Dpartition(strf3Dpartition_t *trf3Dpartition)
int sscal_(const int *n, const float *alpha, float *dx, const int *incx)
int spivot_batch(superlu_dist_options_t *, int batchCount, int m, int n, handle_t *, float **ReqPtr, float **CeqPtr, DiagScale_t *, int **RpivPtr)
Compute row pivotings for each matrix, for numerical stability.
Definition: spivot_batch.c:45
int_t psgstrs_init_device_lsum_x(superlu_dist_options_t *, int_t, int_t, int_t, gridinfo_t *, sLUstruct_t *, sSOLVEstruct_t *, int *)
Definition: psutil.c:779
int_t sSchurComplementSetupGPU(int_t k, msgs_t *msgs, packLUInfo_t *, int_t *, int_t *, int_t *, gEtreeInfo_t *, factNodelists_t *, sscuBufs_t *, sLUValSubBuf_t *LUvsb, gridinfo_t *, sLUstruct_t *, HyP_t *)
void psCompute_Diag_Inv(int_t, sLUstruct_t *, gridinfo_t *, SuperLUStat_t *, int *)
Definition: psgstrs.c:663
int_t siBcastXk2Pck(int_t k, float *x, int nrhs, int **sendList, MPI_Request *send_req, sLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:6182
int_t snonLeafForestBackSolve3d(int_t treeId, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *x, float *lsum, sxT_struct *xT_s, float *recvbuf, MPI_Request *send_req, int nrhs, slsumBmod_buff_t *lbmod_buf, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:4190
void sreadtriple_noheader(FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadtriple_noheader.c:35
void sCompCol_to_CompRow_dist(int_t m, int_t n, int_t nnz, float *a, int_t *colptr, int_t *rowind, float **at, int_t **rowptr, int_t **colind)
int_t sTrs2_GatherU(int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, float *uval, float *tempv)
Definition: psgstrf2.c:757
void slaqgs_dist(SuperMatrix *, float *, float *, float, float, float, char *)
Definition: slaqgs_dist.c:93
int_t sblock_gemm_scatterBottomLeft(int_t lb, int_t j, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int *indirect, int *indirect2, HyP_t *HyP, sLUstruct_t *, gridinfo_t *, SCT_t *SCT, SuperLUStat_t *)
int file_Printfloat5(FILE *, char *, int_t, float *)
Definition: sutil_dist.c:626
void sCreate_Dense_Matrix_dist(SuperMatrix *, int_t, int_t, float *, int_t, Stype_t, Dtype_t, Mtype_t)
void sblock_gemm_scatter(int_t lb, int_t j, Ublock_info_t *Ublock_info, Remain_info_t *Remain_info, float *L_mat, int ldl, float *U_mat, int ldu, float *bigV, int_t knsupc, int_t klst, int_t *lsub, int_t *usub, int_t ldt, int_t thread_id, int *indirect, int *indirect2, int_t **Lrowind_bc_ptr, float **Lnzval_bc_ptr, int_t **Ufstnz_br_ptr, float **Unzval_br_ptr, int_t *xsup, gridinfo_t *, SuperLUStat_t *)
int screate_batch_systems(handle_t *SparseMatrix_handles, int batchCount, int nrhs, float **rhs, int *ldb, float **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
Definition: screate_matrix3d.c:301
int strsv_(char *, char *, char *, int *, float *, int *, float *, int *)
int_t slocalSolveXkYk(trtype_t trtype, int_t k, float *x, int nrhs, sLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: psgstrs3d.c:6133
void pxgstrs_finalize(pxgstrs_comm_t *)
Definition: util.c:318
int_t sLpanelUpdate(int_t off0, int_t nsupc, float *ublk_ptr, int_t ld_ujrow, float *lusup, int_t nsupr, SCT_t *)
int sldperm_dist(int, int, int_t, int_t[], int_t[], float[], int_t *, float[], float[])
Definition: sldperm_dist.c:96
int sLluBufFreeArr(int_t numLA, sLUValSubBuf_t **LUvsbs)
sLUValSubBuf_t ** sLluBufInitArr(int_t numLA, sLUstruct_t *LUstruct)
int_t sUPanelUpdate(int_t k, int *factored_U, MPI_Request *, float *BlockLFactor, float *bigV, int_t ldt, Ublock_info_t *, gridinfo_t *, sLUstruct_t *, SuperLUStat_t *, SCT_t *)
float * sgetBigV(int_t, int_t)
int_t sscatter3dLPanels(int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int *supernodeMask)
int_t psReDistribute_B_to_X(float *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, float *x, sScalePermstruct_t *, Glu_persist_t *, gridinfo_t *, sSOLVEstruct_t *)
Definition: psgstrs.c:165
void sRgather_U(int_t k, int_t jj0, int_t *usub, float *uval, float *bigU, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno, int_t *perm_u)
int_t sleafForestForwardSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *x, float *lsum, float *recvbuf, float *rtemp, MPI_Request *send_req, int nrhs, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:1990
float psdistribute_allgrid_index_only(superlu_dist_options_t *options, int_t n, SuperMatrix *A, sScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, sLUstruct_t *LUstruct, gridinfo_t *grid, int *supernodeMask)
Definition: psdistribute.c:3423
void psgsrfs3d(superlu_dist_options_t *, int_t, SuperMatrix *, float, sLUstruct_t *, sScalePermstruct_t *, gridinfo3d_t *, strf3Dpartition_t *, float *, int_t, float *, int_t, int, sSOLVEstruct_t *, float *, SuperLUStat_t *, int *)
Definition: psgsrfs.c:364
void psgsmv_finalize(psgsmv_comm_t *)
Definition: psgsmv.c:371
int_t psReDistribute3d_B_to_X(float *B, int_t m_loc, int nrhs, int_t ldb, int_t fst_row, int_t *ilsum, float *x, sScalePermstruct_t *ScalePermstruct, Glu_persist_t *Glu_persist, gridinfo3d_t *grid3d, sSOLVEstruct_t *SOLVEstruct)
Definition: psgstrs3d.c:6269
int_t strs_X_gather3d(float *x, int nrhs, strf3Dpartition_t *trf3Dpartition, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:1552
int sp_strsv_dist(char *, char *, char *, SuperMatrix *, SuperMatrix *, float *, int *)
Definition: ssp_blas2_dist.c:95
void sLUstructFree(sLUstruct_t *)
Deallocate LUstruct.
Definition: psutil.c:422
int sgemm_(const char *, const char *, const int *, const int *, const int *, const float *, const float *, const int *, const float *, const int *, const float *, float *, const int *)
float sMaxAbsUij(int iam, int n, Glu_persist_t *, sLUstruct_t *, gridinfo_t *)
Find max(abs(U(i,j)))
Definition: sutil_dist.c:682
int_t sTrs2_ScatterU(int_t iukp, int_t rukp, int_t klst, int_t nsupc, int_t ldu, int_t *usub, float *uval, float *tempv)
Definition: psgstrf2.c:782
void sCopy_CompCol_Matrix_dist(SuperMatrix *, SuperMatrix *)
strf3Dpartition_t * sinitTrf3Dpartition_allgrid(int_t n, superlu_dist_options_t *options, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int screate_block_diag_3d(SuperMatrix *A, int batchCount, int nrhs, float **rhs, int *ldb, float **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d)
int sp_sgemv_dist(char *, float, SuperMatrix *, float *, int, float, float *, int)
SpGEMV.
Definition: ssp_blas2_dist.c:394
void slsum_bmod_inv(float *, float *, float *, float *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t **, int_t *, int_t *, int_t, int_t, int, int)
Definition: psgstrs_lsum.c:1364
void sallocScalePermstruct_RC(sScalePermstruct_t *, int_t m, int_t n)
Definition: sssvx3dAux.c:585
float pslangs(char *, SuperMatrix *, gridinfo_t *)
Definition: pslangs.c:65
void sfill_dist(float *, int_t, float)
Fills a float precision array with a given value.
Definition: sutil_dist.c:583
void slsum_bmod(float *, float *, float *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, sLocalLU_t *, MPI_Request[], SuperLUStat_t *)
Definition: psgstrs_lsum.c:246
int_t sbCastXk2Pck(int_t k, sxT_struct *xT_s, int nrhs, sLUstruct_t *LUstruct, gridinfo_t *grid, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:4118
int_t sRecv_UDiagBlock(int_t k0, float *ublk_ptr, int_t size, int_t src, gridinfo_t *, SCT_t *, int)
int_t sinitScuBufs(superlu_dist_options_t *, int_t ldt, int_t num_threads, int_t nsupers, sscuBufs_t *, sLUstruct_t *, gridinfo_t *)
int_t ssparseTreeFactor_ASYNC(sForest_t *sforest, commRequests_t **comReqss, sscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, sLUValSubBuf_t **LUvsbs, sdiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int_t ldt, HyP_t *HyP, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info)
void sgsequ_dist(SuperMatrix *, float *, float *, float *, float *, float *, int *)
Definition: sgsequ_dist.c:92
void slsum_fmod_leaf(int_t treeId, strf3Dpartition_t *trf3Dpartition, float *lsum, float *x, float *xk, float *rtemp, int nrhs, int knsupc, int_t k, int *fmod, int_t nlb, int_t lptr, int_t luptr, int_t *xsup, gridinfo_t *grid, sLocalLU_t *Llu, MPI_Request send_req[], SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:2180
int_t sSchurComplementSetup(int_t k, int *msgcnt, Ublock_info_t *, Remain_info_t *, uPanelInfo_t *, lPanelInfo_t *, int_t *, int_t *, int_t *, float *bigU, int_t *Lsub_buf, float *Lval_buf, int_t *Usub_buf, float *Uval_buf, gridinfo_t *, sLUstruct_t *)
void sRgather_L(int_t k, int_t *lsub, float *lusup, gEtreeInfo_t *, Glu_persist_t *, gridinfo_t *, HyP_t *, int_t *myIperm, int_t *iperm_c_supno)
int_t sleafForestBackSolve3d(superlu_dist_options_t *options, int_t treeId, int_t n, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *x, float *lsum, float *recvbuf, MPI_Request *send_req, int nrhs, slsumBmod_buff_t *lbmod_buf, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:4279
int strs_compute_communication_structure(superlu_dist_options_t *options, int_t n, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, int *supernodeMask, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: psgstrs3d.c:153
void sInit_HyP(superlu_dist_options_t *, HyP_t *HyP, sLocalLU_t *Llu, int_t mcb, int_t mrb)
struct sxT_struct sxT_struct
int psPermute_Dense_Matrix(int_t, int_t, int_t[], int_t[], float[], int, float[], int, int, gridinfo_t *)
Permute the distributed dense matrix: B <= perm(X). perm[i] = j means the i-th row of X is in the j-t...
Definition: psutil.c:297
void sscatter_u(int ib, int jb, int nsupc, int_t iukp, int_t *xsup, int klst, int nbrow, int_t lptr, int temp_nbrow, int_t *lsub, int_t *usub, float *tempv, int_t **Ufstnz_br_ptr, float **Unzval_br_ptr, gridinfo_t *grid)
void sScaleAdd_CompRowLoc_Matrix_dist(SuperMatrix *, SuperMatrix *, float)
Scale and add: adds a scalar multiple of one matrix to another. A_{i,j} = c * A_{i,...
Definition: sutil_dist.c:420
void sDestroy_Tree(int_t, gridinfo_t *, sLUstruct_t *)
Destroy broadcast and reduction trees used in triangular solve.
Definition: psutil.c:1312
int_t sLluBufInit(sLUValSubBuf_t *, sLUstruct_t *)
void slsum_fmod_inv_gpu_wrap(int, int, int, int, float *, float *, int, int, int_t, int *fmod, C_Tree *, C_Tree *, int_t *, int_t *, int64_t *, float *, int64_t *, float *, int64_t *, int_t *, int64_t *, int_t *, int *, gridinfo_t *, int_t, uint64_t *, uint64_t *, float *, float *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int)
void slsum_bmod_inv_master(float *, float *, float *, float *, int, int_t, int *bmod, int_t *, Ucb_indptr_t **, int_t **, int_t *, gridinfo_t *, sLocalLU_t *, SuperLUStat_t **, int_t, int_t, int, int)
Definition: psgstrs_lsum.c:1835
int sequil_batch(superlu_dist_options_t *, int batchCount, int m, int n, handle_t *, float **ReqPtr, float **CeqPtr, DiagScale_t *)
Equilibrate the systems using the LAPACK-style algorithm.
Definition: sequil_batch.c:43
int_t sISend_UDiagBlock(int_t k0, float *ublk_ptr, int_t size, MPI_Request *, gridinfo_t *, int)
void psgsequ(SuperMatrix *, float *, float *, float *, float *, float *, int *, gridinfo_t *)
Definition: psgsequ.c:87
int_t strs_B_init3d(int_t nsupers, float *x, int nrhs, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
Definition: psgstrs3d.c:32
int_t sBcast_UPanel(int_t k, int_t k0, int_t *usub, float *uval, gridinfo_t *, int *msgcnt, int *ToSendD, SCT_t *, int)
int_t szSendUPanel(int_t k, int_t receiver, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void strtri_(char *, char *, int *, float *, int *, int *)
int_t checkRecvLDiag(int_t k, commRequests_t *comReqs, gridinfo_t *, SCT_t *)
Definition: treeFactorization.c:226
void sScaleAddId_CompRowLoc_Matrix_dist(SuperMatrix *, float)
Scale and add I: scales a matrix and adds an identity. A_{i,j} = c * A_{i,j} + \delta_{i,...
Definition: sutil_dist.c:395
int_t sWaitL(int_t k, int *msgcnt, int *msgcntU, MPI_Request *, MPI_Request *, gridinfo_t *, sLUstruct_t *, SCT_t *)
float slangs_dist(char *, SuperMatrix *)
Definition: slangs_dist.c:72
void sCopy_Dense_Matrix_dist(int_t, int_t, float *, int_t, float *, int_t)
int_t sReDistribute_A(SuperMatrix *A, sScalePermstruct_t *ScalePermstruct, Glu_freeable_t *Glu_freeable, int_t *xsup, int_t *supno, gridinfo_t *grid, int_t *colptr[], int_t *rowind[], float *a[])
Definition: psdistribute.c:67
int_t sWaitU(int_t k, int *msgcnt, MPI_Request *, MPI_Request *, gridinfo_t *, sLUstruct_t *, SCT_t *)
int_t sQuerySpace_dist(int_t, sLUstruct_t *, gridinfo_t *, SuperLUStat_t *, superlu_dist_mem_usage_t *)
Definition: smemory_dist.c:73
int_t sinit3DLUstruct(int_t *myTreeIdxs, int_t *myZeroTrIdxs, int_t *nodeCount, int_t **nodeList, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int sp_sgemm_dist(char *, int, float, SuperMatrix *, float *, int, float, float *, int)
Definition: ssp_blas3_dist.c:126
int screate_matrix3d(SuperMatrix *A, int nrhs, float **rhs, int *ldb, float **x, int *ldx, FILE *fp, gridinfo3d_t *grid3d)
int_t scollect3dLpanels(int_t layer, int_t nsupers, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d)
int_t psgsTrBackSolve3d(superlu_dist_options_t *options, int_t n, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *x3d, float *lsum3d, sxT_struct *xT_s, float *recvbuf, MPI_Request *send_req, int nrhs, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, xtrsTimer_t *xtrsTimer)
Definition: psgstrs3d.c:7564
float * floatMalloc_dist(int_t)
Definition: smemory_dist.c:155
void sgather_l(int_t num_LBlk, int_t knsupc, Remain_info_t *L_info, float *lval, int_t LD_lval, float *L_buff)
void sPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *)
Print the blocks in the factored matrix U.
Definition: sutil_dist.c:914
void sFillRHS_dist(char *, int_t, float *, int_t, SuperMatrix *, float *, int_t)
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's.
Definition: sutil_dist.c:570
void sZeroUblocks(int iam, int n, gridinfo_t *, sLUstruct_t *)
Sets all entries of matrix U to zero.
Definition: sutil_dist.c:955
void psgstrs2_omp(int_t k0, int_t k, Glu_persist_t *, gridinfo_t *, sLocalLU_t *, Ublock_info_t *, SuperLUStat_t *)
Definition: psgstrf2.c:850
int psflatten_LDATA(superlu_dist_options_t *options, int_t n, sLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat)
Definition: psgssvx.c:2520
int_t sBcast_LPanel(int_t k, int_t k0, int_t *lsub, float *lusup, gridinfo_t *, int *msgcnt, int **ToSendR, int_t *xsup, SCT_t *, int)
int_t sUDiagBlockRecvWait(int_t k, int *IrecvPlcd_D, int *factored_L, MPI_Request *, gridinfo_t *, sLUstruct_t *, SCT_t *)
void psgsmv_init(SuperMatrix *, int_t *, gridinfo_t *, psgsmv_comm_t *)
Definition: psgsmv.c:27
int sScatter_B3d(NRformat_loc3d *A3d, gridinfo3d_t *grid3d)
void psgsmv(int_t, SuperMatrix *, gridinfo_t *, psgsmv_comm_t *, float x[], float ax[])
Definition: psgsmv.c:235
void psgstrs(superlu_dist_options_t *, int_t, sLUstruct_t *, sScalePermstruct_t *, gridinfo_t *, float *, int_t, int_t, int_t, int, sSOLVEstruct_t *, SuperLUStat_t *, int *)
Definition: psgstrs.c:860
int_t sgatherAllFactoredLUFr(int_t *myZeroTrIdxs, sForest_t *sForests, sLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT)
void sdelete_multiGPU_buffers()
int screate_matrix_dat(SuperMatrix *, int, float **, int *, float **, int *, FILE *, gridinfo_t *)
int superlu_sger(const int m, const int n, const float alpha, const float *x, const int incx, const float *y, const int incy, float *a, const int lda)
void psconvertUROWDATA2skyline(superlu_dist_options_t *, gridinfo_t *, sLUstruct_t *, SuperLUStat_t *, int n)
Definition: psgssvx.c:2304
void psgstrs3d_newsolve(superlu_dist_options_t *options, int_t n, sLUstruct_t *LUstruct, sScalePermstruct_t *ScalePermstruct, strf3Dpartition_t *trf3Dpartition, gridinfo3d_t *grid3d, float *B, int_t m_loc, int_t fst_row, int_t ldb, int nrhs, sSOLVEstruct_t *SOLVEstruct, SuperLUStat_t *stat, int *info)
Definition: psgstrs3d.c:6936
void sreadMM_dist(FILE *, int_t *, int_t *, int_t *, float **, int_t **, int_t **)
Definition: sreadMM.c:38
Mtype_t
Definition: supermatrix.h:42
Dtype_t
Definition: supermatrix.h:35
Stype_t
Definition: supermatrix.h:22
int j
Definition: sutil_dist.c:287