SuperLU Distributed 9.0.0
gpu3d
lupanels.hpp
Go to the documentation of this file.
1#pragma once
2#include <vector>
3#include <iostream>
4#include "superlu_ddefs.h"
5#include "lu_common.hpp"
6
7#ifdef HAVE_CUDA
8#include "lupanels_GPU.cuh"
9#include "gpuCommon.hpp"
10#endif
11
12#include "commWrapper.hpp"
13#include "anc25d.hpp"
14//class lpanelGPU_t;
15//class upanelGPU_t;
16#define GLOBAL_BLOCK_NOT_FOUND -1
17// it can be templatized for double and complex double
19{
20public:
22 double *val;
23 // ifdef GPU acceraleration
24
25#ifdef HAVE_CUDA
26 lpanelGPU_t gpuPanel;
27#endif
28 // bool isDiagIncluded;
29
30 lpanel_t(int_t k, int_t *lsub, double *nzval, int_t *xsup, int_t isDiagIncluded);
31
32 // default constuctor
33#ifdef HAVE_CUDA
34 lpanel_t() : gpuPanel(NULL, NULL)
35 {
36 index = NULL;
37 val = NULL;
38 }
39#else
41 {
42 index = NULL;
43 val = NULL;
44 }
45#endif
46
47 lpanel_t(int_t *index_, double *val_) : index(index_), val(val_) { return; };
48
49 // index[0] is number of blocks
51 {
52 return index[0];
53 }
54 // number of rows
55 int_t nzrows() { return index[1]; }
56 int_t haveDiag() { return index[2]; }
57 int_t ncols() { return index[3]; }
58
59 // global block id of k-th block in the panel
61 {
62 return index[LPANEL_HEADER_SIZE + k];
63 }
64
65 // number of rows in the k-th block
67 {
68 return index[LPANEL_HEADER_SIZE + nblocks() + k + 1] - index[LPANEL_HEADER_SIZE + nblocks() + k];
69 }
70
71 //
72 int_t stRow(int k)
73 {
74 return index[LPANEL_HEADER_SIZE + nblocks() + k];
75 }
76 // row
78 {
79 // LPANEL_HEADER
80 // nblocks() : blocks list
81 // nblocks()+1 : blocks st_points
82 // index[LPANEL_HEADER_SIZE + nblocks() + k] statrting of the block
84 2 * nblocks() + 1 + index[LPANEL_HEADER_SIZE + nblocks() + k]];
85 }
86
87 double *blkPtr(int_t k)
88 {
89 return &val[index[LPANEL_HEADER_SIZE + nblocks() + k]];
90 }
91
93 {
94 return index[LPANEL_HEADER_SIZE + nblocks() + k];
95 }
96
97 int_t LDA() { return index[1]; }
98 int_t find(int_t k);
99 // for L panel I don't need any special transformation function
100 int_t panelSolve(int_t ksupsz, double *DiagBlk, int_t LDD);
101 int_t diagFactor(int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup,
102 superlu_dist_options_t *options, SuperLUStat_t *stat, int *info);
103 int_t packDiagBlock(double *DiagLBlk, int_t LDD);
104 int_t isEmpty() { return index == NULL; }
106 {
107 if (index == NULL)
108 return 0;
109 return ncols() * nzrows();
110 }
111
113 {
114 if (index == NULL)
115 return 0;
116 return LPANEL_HEADER_SIZE + 2 * nblocks() + 1 + nzrows();
117 }
118
119 size_t totalSize()
120 {
121 return sizeof(int_t)*indexSize() + sizeof(double)*nzvalSize();
122 }
123
124 // return the maximal iEnd such that stRow(iEnd)-stRow(iSt) < maxRow;
125 int getEndBlock(int iSt, int maxRows);
126
127 // ~lpanel_t()
128 // {
129 // SUPERLU_FREE(index);
130 // // SUPERLU_FREE(val);
131 // }
132
133#ifdef HAVE_CUDA
134 lpanelGPU_t copyToGPU();
135 lpanelGPU_t copyToGPU(void *basePtr); // when we are doing a single allocation
136 int checkGPU();
137 int copyBackToGPU();
138
139 int_t panelSolveGPU(cublasHandle_t handle, cudaStream_t cuStream,
140 int_t ksupsz,
141 double *DiagBlk, // device pointer
142 int_t LDD);
143
144 int_t diagFactorPackDiagBlockGPU(int_t k,
145 double *UBlk, int_t LDU, // CPU pointers
146 double *DiagLBlk, int_t LDD, // CPU pointers
147 double thresh, int_t *xsup,
148 superlu_dist_options_t *options,
149 SuperLUStat_t *stat, int *info);
150 int_t diagFactorCuSolver(int_t k,
151 cusolverDnHandle_t cusolverH, cudaStream_t cuStream,
152 double *dWork, int* dInfo, // GPU pointers
153 double *dDiagBuf, int_t LDD, // GPU pointers
154 double thresh, int_t *xsup,
155 superlu_dist_options_t *options,
156 SuperLUStat_t *stat, int *info);
157
158 double *blkPtrGPU(int k)
159 {
160 return &gpuPanel.val[blkPtrOffset(k)];
161 }
162
163 lpanel_t(int_t *index_, double *val_, int_t *indexGPU, double *valGPU) : index(index_), val(val_), gpuPanel(indexGPU, valGPU)
164 {
165 return;
166 };
167 int_t copyFromGPU();
168#endif
169};
170
172{
173public:
175 double *val;
176#ifdef HAVE_CUDA
177 // upanelGPU_t* upanelGPU;
178 upanelGPU_t gpuPanel;
179#endif
180
181 // upanel_t(int_t *usub, double *uval);
182 upanel_t(int_t k, int_t *usub, double *uval, int_t *xsup);
183#ifdef HAVE_CUDA
184 upanel_t() : gpuPanel(NULL, NULL)
185 {
186 index = NULL;
187 val = NULL;
188 }
189#else
191 {
192 index = NULL;
193 val = NULL;
194 }
195#endif
196 // constructing from recevied index and val
197 upanel_t(int_t *index_, double *val_) : index(index_), val(val_) { return; };
198 // index[0] is number of blocks
200 {
201 return index[0];
202 }
203 // number of rows
205 if (index == NULL) return 0; /* Sherry added this check. 2/22/2023 */
206 return index[1];
207 }
208 int_t LDA() { return index[2]; } // is also supersize of that coloumn
209
210 // global block id of k-th block in the panel
212 {
213 return index[UPANEL_HEADER_SIZE + k];
214 }
215
216 // number of rows in the k-th block
218 {
219 return index[UPANEL_HEADER_SIZE + nblocks() + k + 1] - index[UPANEL_HEADER_SIZE + nblocks() + k];
220 }
221 // row
223 {
224 // UPANEL_HEADER
225 // nblocks() : blocks list
226 // nblocks()+1 : blocks st_points
227 // index[UPANEL_HEADER_SIZE + nblocks() + k] statrting of the block
228 return &index[UPANEL_HEADER_SIZE +
229 2 * nblocks() + 1 + index[UPANEL_HEADER_SIZE + nblocks() + k]];
230 }
231
232 double *blkPtr(int_t k)
233 {
234 return &val[LDA() * index[UPANEL_HEADER_SIZE + nblocks() + k]];
235 }
236
238 {
239 return LDA() * index[UPANEL_HEADER_SIZE + nblocks() + k];
240 }
241 // for U panel
242 // int_t packed2skyline(int_t* usub, double* uval );
243 int_t packed2skyline(int_t k, int_t *usub, double *uval, int_t *xsup);
244 int_t panelSolve(int_t ksupsz, double *DiagBlk, int_t LDD);
245 int_t diagFactor(int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup,
246 superlu_dist_options_t *options,
247 SuperLUStat_t *stat, int *info);
248
249 // double* blkPtr(int_t k);
250 // int_t LDA();
251 int_t find(int_t k);
252 int_t isEmpty() { return index == NULL; }
254 {
255 if (index == NULL)
256 return 0;
257 return LDA() * nzcols();
258 }
259
261 {
262 if (index == NULL)
263 return 0;
264 return UPANEL_HEADER_SIZE + 2 * nblocks() + 1 + nzcols();
265 }
266 size_t totalSize()
267 {
268 return sizeof(int_t)*indexSize() + sizeof(double)*nzvalSize();
269 }
271 {
272 if (index == NULL)
273 {
274 std::cout << "## Warning: Empty Panel"
275 << "\n";
276 return 0;
277 }
278 int_t alternateNzcols = index[UPANEL_HEADER_SIZE + 2 * nblocks()];
279 // std::cout<<nblocks()<<" nzcols "<<nzcols()<<" alternate nzcols "<< alternateNzcols << "\n";
280 if (nzcols() != alternateNzcols)
281 {
282 printf("Error 175\n");
283 exit(-1);
284 }
285
286 return UPANEL_HEADER_SIZE + 2 * nblocks() + 1 + nzcols();
287 }
288
290 {
291 return index[UPANEL_HEADER_SIZE + nblocks() + k];
292 }
293 int getEndBlock(int jSt, int maxCols);
294
295 // ~upanel_t()
296 // {
297 // SUPERLU_FREE(index);
298 // SUPERLU_FREE(val);
299 // }
300
301
302#ifdef HAVE_CUDA
303 upanelGPU_t copyToGPU();
304 //TODO: implement with baseptr
305 upanelGPU_t copyToGPU(void *basePtr);
306 int copyBackToGPU();
307
308 int_t panelSolveGPU(cublasHandle_t handle, cudaStream_t cuStream,
309 int_t ksupsz, double *DiagBlk, int_t LDD);
310 int checkGPU();
311
312 double *blkPtrGPU(int k)
313 {
314 return &gpuPanel.val[blkPtrOffset(k)];
315 }
316
317 upanel_t(int_t *index_, double *val_, int_t *indexGPU, double *valGPU) : index(index_), val(val_), gpuPanel(indexGPU, valGPU)
318 {
319 return;
320 };
321 int_t copyFromGPU();
322#endif
323};
324
325// Defineing GPU data types
326//lapenGPU_t has exact same structure has lapanel_t but
327// the pointers are on GPU
328
330{
338 // variables for scattering ldt*THREAD_Size
341 double *bigV; // size = THREAD_Size*ldt*ldt
343 double thresh;
344 int *info;
345 //TODO: get it from environment
346 int numDiagBufs = 32; /* Sherry: not fixed yet */
347
348 // Add SCT_t here
352
353
354 // Adding more variables for factorization
357 int maxLeafNodes; /* Sherry added 12/31/22. Computed in LUstruct_v100 constructor */
358
359 ddiagFactBufs_t **dFBufs; /* stores L and U diagonal blocks */
361 // myNodeCount,
362 // treePerm
363 // myZeroTrIdxs
364 // sForests
365 // myTreeIdxs
366 // gEtreeInfo
367
368 // buffers for communication
373 std::vector<double *> diagFactBufs; /* stores diagonal blocks,
374 each one is a normal dense matrix.
375 Sherry: where are they free'd ?? */
376 std::vector<double *> LvalRecvBufs;
377 std::vector<double *> UvalRecvBufs;
378 std::vector<int_t *> LidxRecvBufs;
379 std::vector<int_t *> UidxRecvBufs;
380
381 // send and recv count for 2d comm
382 std::vector<int_t> LvalSendCounts;
383 std::vector<int_t> UvalSendCounts;
384 std::vector<int_t> LidxSendCounts;
385 std::vector<int_t> UidxSendCounts;
386
387 //
388 std::vector<bcastStruct> bcastDiagRow;
389 std::vector<bcastStruct> bcastDiagCol;
390 std::vector<bcastStruct> bcastLval;
391 std::vector<bcastStruct> bcastUval;
392 std::vector<bcastStruct> bcastLidx;
393 std::vector<bcastStruct> bcastUidx;
394
395 int_t krow(int_t k) { return k % Pr; }
396 int_t kcol(int_t k) { return k % Pc; }
397 int_t procIJ(int_t i, int_t j) { return PNUM(krow(i), kcol(j), grid); }
398 int_t supersize(int_t k) { return xsup[k + 1] - xsup[k]; }
399 int_t g2lRow(int_t k) { return k / Pr; }
400 int_t g2lCol(int_t k) { return k / Pc; }
401
403
404 // For GPU acceleration
405 LUstructGPU_t *dA_gpu; // pointing to memory on GPU
406 LUstructGPU_t A_gpu; // pointing to memory accessible on CPU
407
409 // Intermediate for flat batched
413
418
421
423 {
425 COL_MAP
426 };
427
432 dLUstruct_t *LUstruct, gridinfo3d_t *grid3d,
434 double thresh_, int *info_);
435
437 {
438
439 /* Yang: Deallocate the lPanelVec[i] and uPanelVec[i] here instead of using destructors ~lpanel_t or ~upanel_t,
440 as lpanel_t/upanel_t is used for holding temporary communication buffers as well. Note that lPanelVec[i].val is not deallocated here as it's pointing to the L data in the C code*/
441
442 for (int_t i = 0; i < CEILING(nsupers, Pc); ++i)
443 if (i * Pc + mycol < nsupers && isNodeInMyGrid[i * Pc + mycol] == 1){
444 if(lPanelVec[i].index)
445 SUPERLU_FREE(lPanelVec[i].index);
446 // SUPERLU_FREE(lPanelVec[i].val);
447 }
448
449 for (int_t i = 0; i < CEILING(nsupers, Pr); ++i)
450 if (i * Pr + myrow < nsupers && isNodeInMyGrid[i * Pr + myrow] == 1){
451 if(uPanelVec[i].index)
452 SUPERLU_FREE(uPanelVec[i].index);
453 if(uPanelVec[i].val)
455 }
456
457 delete[] lPanelVec;
458 delete[] uPanelVec;
459
460
461 /* free diagonal L and U blocks */
463
468
469 int i;
470 for (i = 0; i < options->num_lookaheads; i++) {
475 }
476
477 for (i = 0; i < numDiagBufs; i++) SUPERLU_FREE(diagFactBufs[i]);
478
479 /* Sherry added the following, which comes from batch setup */
480 superlu_acc_offload = sp_ienv_dist(10, options); //get_acc_offload();
482 //printf(".. free batch buffers\n"); fflush(stdout);
483 SUPERLU_FREE(A_gpu.dFBufs);
484 SUPERLU_FREE(A_gpu.gpuGemmBuffs);
485
486 for (int stream = 0; stream < A_gpu.numCudaStreams; stream++)
487 {
488 cusolverDnDestroy(A_gpu.cuSolveHandles[stream]);
489 cublasDestroy(A_gpu.cuHandles[stream]);
490 cublasDestroy(A_gpu.lookAheadLHandle[stream]);
491 cublasDestroy(A_gpu.lookAheadUHandle[stream]);
492 }
493
494
495 }
496
498
499 } /* end destructor LUstruct_v100 */
500
501
507 int_t *computeIndirectMap(indirectMapType direction, int_t srcLen, int_t *srcVec,
508 int_t dstLen, int_t *dstVec);
509
511 int_t gi, int_t gj,
512 double *V, int_t ldv,
513 int_t *srcRowList, int_t *srcColList);
514
516 int_t k, int_t laIdx, lpanel_t &lpanel, upanel_t &upanel);
518 int_t ii, int_t jj, lpanel_t &lpanel, upanel_t &upanel);
520 int_t k, int_t ex, // suypernodes to be excluded
521 lpanel_t &lpanel, upanel_t &upanel);
522
524 sForest_t *sforest,
525 ddiagFactBufs_t **dFBufs, // size maxEtree level
526 gEtreeInfo_t *gEtreeInfo, // global etree info
527 int tag_ub);
528
530 sForest_t *sforest,
531 ddiagFactBufs_t **dFBufs, // size maxEtree level
532 gEtreeInfo_t *gEtreeInfo, // global etree info
533 int tag_ub);
534
535 // Helper routine to marshall batch LU data into the device data in A_gpu
536 void marshallBatchedLUData(int k_st, int k_end, int_t *perm_c_supno);
537 void marshallBatchedBufferCopyData(int k_st, int k_end, int_t *perm_c_supno);
538 void marshallBatchedTRSMUData(int k_st, int k_end, int_t *perm_c_supno);
539 void marshallBatchedTRSMLData(int k_st, int k_end, int_t *perm_c_supno);
540 void marshallBatchedSCUData(int k_st, int k_end, int_t *perm_c_supno);
541 void initSCUMarshallData(int k_st, int k_end, int_t *perm_c_supno);
542 int marshallSCUBatchedDataInner(int k_st, int k_end, int_t *perm_c_supno);
543 int marshallSCUBatchedDataOuter(int k_st, int k_end, int_t *perm_c_supno);
544 void dFactBatchSolve(int k_st, int k_end, int_t *perm_c_supno);
545
546 //
548 int_t dPanelBcast(int_t k, int_t offset);
550 sForest_t *sforest,
551 ddiagFactBufs_t **dFBufs, // size maxEtree level
552 gEtreeInfo_t *gEtreeInfo, // global etree info
553 int tag_ub);
554
556
557 int_t ancestorReduction3d(int_t ilvl, int_t *myNodeCount,
558 int_t **treePerm);
559
560 int_t zSendLPanel(int_t k0, int_t receiverGrid);
561 int_t zRecvLPanel(int_t k0, int_t senderGrid, double alpha, double beta);
562 int_t zSendUPanel(int_t k0, int_t receiverGrid);
563 int_t zRecvUPanel(int_t k0, int_t senderGrid, double alpha, double beta);
564
566 int_t alvl,
567 sForest_t *sforest,
568 ddiagFactBufs_t **dFBufs, // size maxEtree level
569 gEtreeInfo_t *gEtreeInfo, // global etree info
570 int tag_ub);
571
572
573
574 // GPU related functions
575#ifdef HAVE_CUDA
576 int_t setLUstruct_GPU();
577 int_t dsparseTreeFactorGPU(
578 sForest_t *sforest,
579 ddiagFactBufs_t **dFBufs, // size maxEtree level
580 gEtreeInfo_t *gEtreeInfo, // global etree info
581 int tag_ub);
582 int_t dsparseTreeFactorGPUBaseline(
583 sForest_t *sforest,
584 ddiagFactBufs_t **dFBufs, // size maxEtree level
585 gEtreeInfo_t *gEtreeInfo, // global etree info
586 int tag_ub);
587
588 int_t dAncestorFactorBaselineGPU(
589 int_t alvl,
590 sForest_t *sforest,
591 ddiagFactBufs_t **dFBufs, // size maxEtree level
592 gEtreeInfo_t *gEtreeInfo, // global etree info
593 int tag_ub);
594
595 int_t dSchurComplementUpdateGPU(
596 int streamId,
597 int_t k, lpanel_t &lpanel, upanel_t &upanel);
598 int_t dSchurCompUpdatePartGPU(
599 int_t iSt, int_t iEnd, int_t jSt, int_t jEnd,
600 int_t k, lpanel_t &lpanel, upanel_t &upanel,
601 cublasHandle_t handle, cudaStream_t cuStream,
602 double *gemmBuff);
603 int_t lookAheadUpdateGPU(
604 int streamId,
605 int_t k, int_t laIdx, lpanel_t &lpanel, upanel_t &upanel);
606 int_t dSchurCompUpLimitedMem(
607 int streamId,
608 int_t lStart, int_t lEnd,
609 int_t uStart, int_t uEnd,
610 int_t k, lpanel_t &lpanel, upanel_t &upanel);
611 int_t dSchurCompUpdateExcludeOneGPU(
612 int streamId,
613 int_t k, int_t ex, // suypernodes to be excluded
614 lpanel_t &lpanel, upanel_t &upanel);
615
616 int_t dDiagFactorPanelSolveGPU(int_t k, int_t offset, ddiagFactBufs_t **dFBufs);
617 int_t dPanelBcastGPU(int_t k, int_t offset);
618
619 int_t ancestorReduction3dGPU(int_t ilvl, int_t *myNodeCount,
620 int_t **treePerm);
621 int_t zSendLPanelGPU(int_t k0, int_t receiverGrid);
622 int_t zRecvLPanelGPU(int_t k0, int_t senderGrid, double alpha, double beta);
623 int_t zSendUPanelGPU(int_t k0, int_t receiverGrid);
624 int_t zRecvUPanelGPU(int_t k0, int_t senderGrid, double alpha, double beta);
625 int_t copyLUGPUtoHost();
626 int_t copyLUHosttoGPU();
627 int_t checkGPU();
628
629 // some more helper functions
630 upanel_t getKUpanel(int_t k, int_t offset);
631 lpanel_t getKLpanel(int_t k, int_t offset);
632 int_t SyncLookAheadUpdate(int streamId);
633
634 double *gpuLvalBasePtr, *gpuUvalBasePtr;
635 int_t *gpuLidxBasePtr, *gpuUidxBasePtr;
636 size_t gpuLvalSize, gpuUvalSize, gpuLidxSize, gpuUidxSize;
637
638 lpanelGPU_t* copyLpanelsToGPU();
639 upanelGPU_t* copyUpanelsToGPU();
640
641 // to perform diagFactOn GPU
642 int_t dDFactPSolveGPU(int_t k, int_t offset, ddiagFactBufs_t **dFBufs);
643 int_t dDFactPSolveGPU(int_t k, int_t handle_offset, int buffer_offset, ddiagFactBufs_t **dFBufs);
644#endif
645};
646
Definition: lupanels.hpp:19
int_t isEmpty()
Definition: lupanels.hpp:104
lpanel_t()
Definition: lupanels.hpp:40
int_t gid(int_t k)
Definition: lupanels.hpp:60
int_t LDA()
Definition: lupanels.hpp:97
int_t nbrow(int_t k)
Definition: lupanels.hpp:66
int_t * rowList(int_t k)
Definition: lupanels.hpp:77
size_t blkPtrOffset(int_t k)
Definition: lupanels.hpp:92
int_t nblocks()
Definition: lupanels.hpp:50
int_t haveDiag()
Definition: lupanels.hpp:56
int_t nzrows()
Definition: lupanels.hpp:55
int_t panelSolve(int_t ksupsz, double *DiagBlk, int_t LDD)
Definition: l_panels.cpp:60
int_t nzvalSize()
Definition: lupanels.hpp:105
int_t find(int_t k)
Definition: l_panels.cpp:49
int_t packDiagBlock(double *DiagLBlk, int_t LDD)
Definition: l_panels.cpp:88
size_t totalSize()
Definition: lupanels.hpp:119
double * val
Definition: lupanels.hpp:22
int getEndBlock(int iSt, int maxRows)
Definition: l_panels.cpp:100
int_t indexSize()
Definition: lupanels.hpp:112
double * blkPtr(int_t k)
Definition: lupanels.hpp:87
int_t * index
Definition: lupanels.hpp:21
int_t stRow(int k)
Definition: lupanels.hpp:72
int_t diagFactor(int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
Definition: l_panels.cpp:78
lpanel_t(int_t *index_, double *val_)
Definition: lupanels.hpp:47
int_t ncols()
Definition: lupanels.hpp:57
Definition: lupanels.hpp:172
int_t LDA()
Definition: lupanels.hpp:208
int_t nbcol(int_t k)
Definition: lupanels.hpp:217
int_t * index
Definition: lupanels.hpp:174
upanel_t(int_t *index_, double *val_)
Definition: lupanels.hpp:197
double * blkPtr(int_t k)
Definition: lupanels.hpp:232
int_t nblocks()
Definition: lupanels.hpp:199
int_t indexSize()
Definition: lupanels.hpp:260
int_t packed2skyline(int_t k, int_t *usub, double *uval, int_t *xsup)
Definition: u_panels.cpp:74
int_t stCol(int k)
Definition: lupanels.hpp:289
int_t nzvalSize()
Definition: lupanels.hpp:253
size_t totalSize()
Definition: lupanels.hpp:266
upanel_t()
Definition: lupanels.hpp:190
int_t nzcols()
Definition: lupanels.hpp:204
int_t isEmpty()
Definition: lupanels.hpp:252
int_t gid(int_t k)
Definition: lupanels.hpp:211
int_t find(int_t k)
Definition: u_panels.cpp:110
int_t diagFactor(int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info)
int_t * colList(int_t k)
Definition: lupanels.hpp:222
int getEndBlock(int jSt, int maxCols)
Definition: u_panels.cpp:131
int_t panelSolve(int_t ksupsz, double *DiagBlk, int_t LDD)
Definition: u_panels.cpp:121
int_t checkCorrectness()
Definition: lupanels.hpp:270
size_t blkPtrOffset(int_t k)
Definition: lupanels.hpp:237
double * val
Definition: lupanels.hpp:175
#define UPANEL_HEADER_SIZE
Definition: lu_common.hpp:7
#define LPANEL_HEADER_SIZE
Definition: lu_common.hpp:6
integer, parameter, public lsub
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
Definition: lupanels.hpp:330
std::vector< bcastStruct > bcastLval
Definition: lupanels.hpp:390
int numDiagBufs
Definition: lupanels.hpp:346
int_t supersize(int_t k)
Definition: lupanels.hpp:398
int_t myrow
Definition: lupanels.hpp:335
std::vector< int_t > UidxSendCounts
Definition: lupanels.hpp:385
std::vector< bcastStruct > bcastLidx
Definition: lupanels.hpp:392
void dFactBatchSolve(int k_st, int k_end, int_t *perm_c_supno)
int_t packedU2skyline(dLUstruct_t *LUstruct)
Definition: lupanels.cpp:379
void marshallBatchedBufferCopyData(int k_st, int k_end, int_t *perm_c_supno)
int_t * indirect
Definition: lupanels.hpp:340
void marshallBatchedTRSMUData(int k_st, int k_end, int_t *perm_c_supno)
std::vector< int_t * > UidxRecvBufs
Definition: lupanels.hpp:379
int marshallSCUBatchedDataOuter(int k_st, int k_end, int_t *perm_c_supno)
int * isNodeInMyGrid
Definition: lupanels.hpp:342
int_t dSchurCompUpdateExcludeOne(int_t k, int_t ex, lpanel_t &lpanel, upanel_t &upanel)
Definition: lupanels.cpp:476
int_t zRecvUPanel(int_t k0, int_t senderGrid, double alpha, double beta)
Definition: lupanels_comm3d.cpp:114
int_t kcol(int_t k)
Definition: lupanels.hpp:396
LUstructGPU_t * dA_gpu
Definition: lupanels.hpp:405
int64_t total_start_size
Definition: lupanels.hpp:417
int nThreads
Definition: lupanels.hpp:339
int_t dsparseTreeFactorBaseline(sForest_t *sforest, ddiagFactBufs_t **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked.cpp:216
int_t dDiagFactorPanelSolve(int_t k, int_t offset, ddiagFactBufs_t **dFBufs)
Definition: lupanels.cpp:505
void marshallBatchedTRSMLData(int k_st, int k_end, int_t *perm_c_supno)
int maxLeafNodes
Definition: lupanels.hpp:357
int ** d_lblock_gid_ptrs
Definition: lupanels.hpp:414
void initSCUMarshallData(int k_st, int k_end, int_t *perm_c_supno)
SCT_t * SCT
Definition: lupanels.hpp:349
SuperLUStat_t * stat
Definition: lupanels.hpp:351
std::vector< double * > UvalRecvBufs
Definition: lupanels.hpp:377
int_t ldt
Definition: lupanels.hpp:335
int * d_lblock_start_dat
Definition: lupanels.hpp:415
int_t ancestorReduction3d(int_t ilvl, int_t *myNodeCount, int_t **treePerm)
Definition: lupanels_comm3d.cpp:6
std::vector< bcastStruct > bcastDiagRow
Definition: lupanels.hpp:388
upanel_t * uPanelVec
Definition: lupanels.hpp:332
int_t zSendUPanel(int_t k0, int_t receiverGrid)
Definition: lupanels_comm3d.cpp:97
std::vector< bcastStruct > bcastDiagCol
Definition: lupanels.hpp:389
ddiagFactBufs_t ** dFBufs
Definition: lupanels.hpp:359
int64_t total_l_blocks
Definition: lupanels.hpp:417
std::vector< int_t * > LidxRecvBufs
Definition: lupanels.hpp:378
int_t * indirectRow
Definition: lupanels.hpp:340
std::vector< int_t > UvalSendCounts
Definition: lupanels.hpp:383
LUstruct_v100(int_t nsupers, int_t ldt_, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT_, superlu_dist_options_t *options_, SuperLUStat_t *stat, double thresh_, int *info_)
Definition: lupanels.cpp:44
int superlu_acc_offload
Definition: lupanels.hpp:360
int ** d_lblock_start_ptrs
Definition: lupanels.hpp:415
double * bigV
Definition: lupanels.hpp:341
int dsparseTreeFactorBatchGPU(sForest_t *sforest, ddiagFactBufs_t **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
int_t krow(int_t k)
Definition: lupanels.hpp:395
int_t zSendLPanel(int_t k0, int_t receiverGrid)
Definition: lupanels_comm3d.cpp:59
int_t blockUpdate(int_t k, int_t ii, int_t jj, lpanel_t &lpanel, upanel_t &upanel)
Definition: lupanels.cpp:446
int marshallSCUBatchedDataInner(int k_st, int k_end, int_t *perm_c_supno)
void computeLBlockData()
superlu_dist_options_t * options
Definition: lupanels.hpp:350
int_t pdgstrf3d()
Definition: pdgstrf3d_upacked.cpp:211
std::vector< double * > LvalRecvBufs
Definition: lupanels.hpp:376
int_t procIJ(int_t i, int_t j)
Definition: lupanels.hpp:397
int_t dsparseTreeFactor(sForest_t *sforest, ddiagFactBufs_t **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: dsparseTreeFactor_upacked.cpp:5
int_t * xsup
Definition: lupanels.hpp:336
anc25d_t anc25d
Definition: lupanels.hpp:402
int_t maxLvalCount
Definition: lupanels.hpp:369
int_t maxUidxCount
Definition: lupanels.hpp:372
int_t dPanelBcast(int_t k, int_t offset)
Definition: lupanels.cpp:535
indirectMapType
Definition: lupanels.hpp:423
@ ROW_MAP
Definition: lupanels.hpp:424
@ COL_MAP
Definition: lupanels.hpp:425
std::vector< int_t > LvalSendCounts
Definition: lupanels.hpp:382
int_t g2lCol(int_t k)
Definition: lupanels.hpp:400
double thresh
Definition: lupanels.hpp:343
dLocalLU_t * host_Llu
Definition: lupanels.hpp:411
~LUstruct_v100()
Definition: lupanels.hpp:436
int_t * indirectCol
Definition: lupanels.hpp:340
dLocalLU_t d_localLU
Definition: lupanels.hpp:412
int_t maxUvalCount
Definition: lupanels.hpp:371
void marshallBatchedSCUData(int k_st, int k_end, int_t *perm_c_supno)
void marshallBatchedLUData(int k_st, int k_end, int_t *perm_c_supno)
lpanel_t * lPanelVec
Definition: lupanels.hpp:331
int_t mycol
Definition: lupanels.hpp:335
std::vector< double * > diagFactBufs
Definition: lupanels.hpp:373
LUstructGPU_t A_gpu
Definition: lupanels.hpp:406
int_t lookAheadUpdate(int_t k, int_t laIdx, lpanel_t &lpanel, upanel_t &upanel)
Definition: lupanels.cpp:407
int64_t * d_lblock_start_offsets
Definition: lupanels.hpp:416
int * info
Definition: lupanels.hpp:344
int_t maxLvl
Definition: lupanels.hpp:356
std::vector< bcastStruct > bcastUval
Definition: lupanels.hpp:391
std::vector< int_t > LidxSendCounts
Definition: lupanels.hpp:384
int_t * computeIndirectMap(indirectMapType direction, int_t srcLen, int_t *srcVec, int_t dstLen, int_t *dstVec)
Definition: lupanels.cpp:292
int_t nsupers
Definition: lupanels.hpp:337
int_t Pr
Definition: lupanels.hpp:335
std::vector< bcastStruct > bcastUidx
Definition: lupanels.hpp:393
dtrf3Dpartition_t * trf3Dpartition
Definition: lupanels.hpp:355
int_t dAncestorFactorBaseline(int_t alvl, sForest_t *sforest, ddiagFactBufs_t **dFBufs, gEtreeInfo_t *gEtreeInfo, int tag_ub)
Definition: anc25d.cpp:25
int_t maxLidxCount
Definition: lupanels.hpp:370
int_t g2lRow(int_t k)
Definition: lupanels.hpp:399
int64_t * d_lblock_gid_offsets
Definition: lupanels.hpp:416
int_t iam
Definition: lupanels.hpp:335
int_t dSchurComplementUpdate(int_t k, lpanel_t &lpanel, upanel_t &upanel)
Definition: lupanels.cpp:267
gridinfo3d_t * grid3d
Definition: lupanels.hpp:333
gridinfo_t * grid
Definition: lupanels.hpp:334
int_t zRecvLPanel(int_t k0, int_t senderGrid, double alpha, double beta)
Definition: lupanels_comm3d.cpp:76
int_t Pc
Definition: lupanels.hpp:335
int_t dScatter(int_t m, int_t n, int_t gi, int_t gj, double *V, int_t ldv, int_t *srcRowList, int_t *srcColList)
Definition: lupanels.cpp:323
int * d_lblock_gid_dat
Definition: lupanels.hpp:414
Definition: util_dist.h:199
Definition: util_dist.h:101
Definition: anc25d.hpp:13
Definition: superlu_ddefs.h:340
Definition: superlu_ddefs.h:97
Definition: superlu_ddefs.h:467
Definition: superlu_ddefs.h:318
Definition: superlu_defs.h:978
Definition: superlu_defs.h:414
Definition: superlu_defs.h:404
Definition: superlu_defs.h:989
Definition: superlu_defs.h:728
int num_lookaheads
Definition: superlu_defs.h:757
Distributed SuperLU data types and function prototypes.
int dfreeDiagFactBufsArr(int mxLeafNode, ddiagFactBufs_t **dFBufs)
#define CEILING(a, b)
Definition: superlu_defs.h:277
int sp_ienv_dist(int, superlu_dist_options_t *)
Definition: sp_ienv.c:80
int64_t int_t
Definition: superlu_defs.h:119
#define PNUM(i, j, grid)
Definition: superlu_defs.h:276
int j
Definition: sutil_dist.c:287
int i
Definition: sutil_dist.c:287
#define SUPERLU_FREE(addr)
Definition: util_dist.h:54