SuperLU Distributed 9.0.0
gpu3d
u_panels_impl.hpp
Go to the documentation of this file.
1#pragma once
2#include "lupanels.hpp"
3
4template <typename Ftype>
6{
7 int_t kSupSz = SuperSize(k);
8 int_t kLastRow = xsup[k + 1];
9 /* compute number of columns */
10 int_t nonZeroCols = 0;
11 int_t usubPtr = BR_HEADER;
12 int_t nub = usub[0];
13
14 for (int_t ub = 0; ub < nub; ub++)
15 {
16 int_t gblockId = usub[usubPtr];
17 int_t gsupc = SuperSize(gblockId);
18 for (int_t col = 0; col < gsupc; col++)
19 {
20 int_t segsize = kLastRow - usub[usubPtr + UB_DESCRIPTOR + col];
21 if (segsize)
22 nonZeroCols++;
23 }
24
25 usubPtr += UB_DESCRIPTOR + gsupc;
26 }
27
28 int_t uIndexSize = UPANEL_HEADER_SIZE + 2 * nub + 1 + nonZeroCols;
29 //Allocating the index and val
30 index = (int_t*) SUPERLU_MALLOC(sizeof(int_t) * uIndexSize);
31 val = (Ftype *)SUPERLU_MALLOC(sizeof(Ftype) * nonZeroCols * kSupSz);
32 index[0] = nub;
33 index[1] = nonZeroCols;
34 index[2] = kSupSz;
35 index[UPANEL_HEADER_SIZE + nub] = 0; // starting of prefix sum is zero
36 // now start the loop
37 int_t blkIdPtr = UPANEL_HEADER_SIZE;
38 int_t pxSumPtr = UPANEL_HEADER_SIZE + nub + 1;
39 int_t colIdxPtr = UPANEL_HEADER_SIZE + 2 * nub + 1;
40 int_t srcUvalPtr = 0;
41 int_t dstUvalPtr = 0;
42 // reset the USUB ptr
43 usubPtr = BR_HEADER;
44 for (int_t ub = 0; ub < nub; ub++)
45 {
46 int_t gblockId = usub[usubPtr];
47 index[blkIdPtr++] = gblockId;
48 int_t local_nzcols = 0;
49 int_t gsupc = SuperSize(gblockId);
50 for (int_t col = 0; col < gsupc; col++)
51 {
52 int_t segsize = kLastRow - usub[usubPtr + UB_DESCRIPTOR + col];
53 if (segsize)
54 {
55 for(int row=0; row<kSupSz; row++)
56 {
57 if(row<kSupSz-segsize)
58 val[dstUvalPtr++] = zeroT<Ftype>();
59 else
60 val[dstUvalPtr++] =uval[srcUvalPtr++];
61 }
62
63 index[colIdxPtr++] = col;
64 local_nzcols++;
65 }
66 }
67 index[pxSumPtr] = index[pxSumPtr - 1] + local_nzcols;
68 pxSumPtr++;
69 usubPtr += UB_DESCRIPTOR + gsupc;
70 }
71
72 return;
73}
74
75template <typename Ftype>
77{
78 int_t kSupSz = SuperSize(k);
79 int_t kLastRow = xsup[k + 1];
80 int_t srcUvalPtr = 0;
81 int_t dstUvalPtr = 0;
82 // reset the USUB ptr
83 int_t usubPtr = BR_HEADER;
84 int_t nub = nblocks();
85
86 for (int_t ub = 0; ub < nub; ub++)
87 {
88 int_t gblockId = usub[usubPtr];
89 int_t gsupc = SuperSize(gblockId);
90 for (int_t col = 0; col < gsupc; col++)
91 {
92 int_t segsize = kLastRow - usub[usubPtr + UB_DESCRIPTOR + col];
93 if (segsize)
94 {
95 for(int row=0; row<kSupSz; row++)
96 {
97 if(row<kSupSz-segsize)
98 dstUvalPtr++;
99 else
100 uval[srcUvalPtr++] =val[dstUvalPtr++];
101 }
102
103 }
104 }
105
106 usubPtr += UB_DESCRIPTOR + gsupc;
107 }
108 return 0;
109}
110
111template <typename Ftype>
113{
114 //TODO: possible to optimize
115 for (int_t i = 0; i < nblocks(); i++)
116 {
117 if (k == gid(i))
118 return i;
119 }
120 //TODO: it shouldn't come here
122}
123template <typename Ftype>
124int_t xupanel_t<Ftype>::panelSolve(int_t ksupsz, Ftype *DiagBlk, int_t LDD)
125{
126 if (isEmpty()) return 0;
127
128 superlu_trsm<Ftype>("L", "L", "N", "U",
129 ksupsz, nzcols(), one<Ftype>(), DiagBlk, LDD, val, LDA());
130 return 0;
131}
132
133template <typename Ftype>
134int xupanel_t<Ftype>::getEndBlock(int iSt, int maxCols)
135{
136 int nlb = nblocks();
137 if(iSt >= nlb )
138 return nlb;
139 int iEnd = iSt;
140 int ii = iSt +1;
141
142 while (
143 stCol(ii) - stCol(iSt) <= maxCols &&
144 ii < nlb)
145 ii++;
146
147#if 1
148 if (stCol(ii) - stCol(iSt) > maxCols)
149 iEnd = ii-1;
150 else
151 iEnd =ii;
152#else
153 if (ii == nlb)
154 {
155 if (stCol(ii) - stCol(iSt) <= maxCols)
156 iEnd = nlb;
157 else
158 iEnd = nlb - 1;
159 }
160 else
161 iEnd = ii - 1;
162#endif
163 return iEnd;
164}
int_t find(int_t k)
Definition: u_panels_impl.hpp:112
int_t packed2skyline(int_t k, int_t *usub, Ftype *uval, int_t *xsup)
Definition: u_panels_impl.hpp:76
int getEndBlock(int jSt, int maxCols)
Definition: u_panels_impl.hpp:134
xupanel_t()
Definition: xlupanels.hpp:194
int_t panelSolve(int_t ksupsz, Ftype *DiagBlk, int_t LDD)
Definition: u_panels_impl.hpp:124
#define UPANEL_HEADER_SIZE
Definition: lu_common.hpp:7
#define GLOBAL_BLOCK_NOT_FOUND
Definition: lupanels.hpp:16
integer, parameter, public row
Definition: superlupara.f90:35
integer, parameter, public col
Definition: superlupara.f90:35
integer, parameter, public usub
Definition: superlupara.f90:35
#define SuperSize(bnum)
Definition: superlu_defs.h:271
#define BR_HEADER
Definition: superlu_defs.h:200
int64_t int_t
Definition: superlu_defs.h:119
#define UB_DESCRIPTOR
Definition: superlu_defs.h:201
int i
Definition: sutil_dist.c:287
#define SUPERLU_MALLOC(size)
Definition: util_dist.h:48