1#ifndef __SUPERLU_BATCH_FACTORIZE_MARSHALL_H__
2#define __SUPERLU_BATCH_FACTORIZE_MARSHALL_H__
36 if(Lnzval && Lrowind_bc)
87 if(Ucolind_br && Unzval && Lrowind_bc && Lnzval)
89 int upanel_rows = Ucolind_br[2];
90 int sup_offset = ksupc - upanel_rows;
96 diag_ptrs[
i] = Lnzval + sup_offset + sup_offset * Lrowind_bc[1];
141 if(Lnzval && Lrowind_bc)
144 int_t nzrows = Lrowind_bc[1];
145 int_t len = nzrows - diag_block_offset;
211 if(Ucolind_br && Unzval && Lrowind_bc && Lnzval)
213 int upanel_rows = Ucolind_br[2];
214 int sup_offset = ksupc - upanel_rows;
217 int_t L_nzrows = Lrowind_bc[1];
218 int_t L_len = L_nzrows - diag_block_offset;
220 A_ptrs[
i] = Lnzval + diag_block_offset + sup_offset * L_nzrows;
234 iend[
i] = Lrowind_bc[0];
235 jend[
i] = Ucolind_br[0];
246template <
class T,
class offT>
259 inline __host__ __device__
void operator()(
const offT &index)
const
265template<
class T,
class offT>
271 thrust::system::cuda::par, thrust::counting_iterator<offT>(0),
272 thrust::counting_iterator<offT>(num_arrays), offset_ptr_functor
288 return end[x] -
st[x];
#define BatchDim_t
Definition: batch_factorize.h:11
void generateOffsetPointers(T *base_mem, offT *offsets, T **ptrs, size_t num_arrays)
Definition: batch_factorize_marshall.h:266
Definition: batch_factorize_marshall.h:9
int_t k_st
Definition: batch_factorize_marshall.h:12
double ** Lnzval_bc_ptr
Definition: batch_factorize_marshall.h:11
int_t * xsup
Definition: batch_factorize_marshall.h:12
BatchDim_t * dim_batch
Definition: batch_factorize_marshall.h:10
double ** diag_ptrs
Definition: batch_factorize_marshall.h:11
__device__ void operator()(const int_t &i) const
Definition: batch_factorize_marshall.h:30
MarshallLUFunc_flat(int_t k_st, double **diag_ptrs, BatchDim_t *ld_batch, BatchDim_t *dim_batch, double **Lnzval_bc_ptr, int_t **Lrowind_bc_ptr, int_t *dperm_c_supno, int_t *xsup)
Definition: batch_factorize_marshall.h:14
BatchDim_t * ld_batch
Definition: batch_factorize_marshall.h:10
int_t * dperm_c_supno
Definition: batch_factorize_marshall.h:12
int_t ** Lrowind_bc_ptr
Definition: batch_factorize_marshall.h:12
Definition: batch_factorize_marshall.h:163
BatchDim_t * ist
Definition: batch_factorize_marshall.h:168
int_t ** Lrowind_bc_ptr
Definition: batch_factorize_marshall.h:167
MarshallSCUFunc_flat(int_t k_st, double **A_ptrs, BatchDim_t *lda_array, double **B_ptrs, BatchDim_t *ldb_array, double **C_ptrs, BatchDim_t *ldc_array, BatchDim_t *m_array, BatchDim_t *n_array, BatchDim_t *k_array, BatchDim_t *ist, BatchDim_t *iend, BatchDim_t *jst, BatchDim_t *jend, double **Unzval_br_new_ptr, int_t **Ucolind_br_ptr, double **Lnzval_bc_ptr, int_t **Lrowind_bc_ptr, int_t *dperm_c_supno, int_t *xsup, double **dgpuGemmBuffs)
Definition: batch_factorize_marshall.h:170
__device__ void operator()(const int_t &i) const
Definition: batch_factorize_marshall.h:201
BatchDim_t * ldc_array
Definition: batch_factorize_marshall.h:165
BatchDim_t * k_array
Definition: batch_factorize_marshall.h:165
int_t * xsup
Definition: batch_factorize_marshall.h:167
int_t k_st
Definition: batch_factorize_marshall.h:167
BatchDim_t * jend
Definition: batch_factorize_marshall.h:168
BatchDim_t * lda_array
Definition: batch_factorize_marshall.h:165
int_t ** Ucolind_br_ptr
Definition: batch_factorize_marshall.h:167
double ** C_ptrs
Definition: batch_factorize_marshall.h:164
BatchDim_t * n_array
Definition: batch_factorize_marshall.h:165
double ** Lnzval_bc_ptr
Definition: batch_factorize_marshall.h:166
double ** Unzval_br_new_ptr
Definition: batch_factorize_marshall.h:166
BatchDim_t * m_array
Definition: batch_factorize_marshall.h:165
int_t * dperm_c_supno
Definition: batch_factorize_marshall.h:167
BatchDim_t * ldb_array
Definition: batch_factorize_marshall.h:165
double ** B_ptrs
Definition: batch_factorize_marshall.h:164
BatchDim_t * jst
Definition: batch_factorize_marshall.h:168
BatchDim_t * iend
Definition: batch_factorize_marshall.h:168
double ** A_ptrs
Definition: batch_factorize_marshall.h:164
double ** dgpuGemmBuffs
Definition: batch_factorize_marshall.h:166
Definition: batch_factorize_marshall.h:109
BatchDim_t * panel_dim_batch
Definition: batch_factorize_marshall.h:110
int_t * xsup
Definition: batch_factorize_marshall.h:112
BatchDim_t * diag_dim_batch
Definition: batch_factorize_marshall.h:110
double ** Lnzval_bc_ptr
Definition: batch_factorize_marshall.h:111
BatchDim_t * panel_ld_batch
Definition: batch_factorize_marshall.h:110
int_t k_st
Definition: batch_factorize_marshall.h:112
MarshallTRSMLFunc_flat(int_t k_st, double **diag_ptrs, BatchDim_t *diag_ld_batch, BatchDim_t *diag_dim_batch, double **panel_ptrs, BatchDim_t *panel_ld_batch, BatchDim_t *panel_dim_batch, double **Lnzval_bc_ptr, int_t **Lrowind_bc_ptr, int_t *dperm_c_supno, int_t *xsup)
Definition: batch_factorize_marshall.h:114
__device__ void operator()(const int_t &i) const
Definition: batch_factorize_marshall.h:134
int_t * dperm_c_supno
Definition: batch_factorize_marshall.h:112
double ** panel_ptrs
Definition: batch_factorize_marshall.h:111
BatchDim_t * diag_ld_batch
Definition: batch_factorize_marshall.h:110
int_t ** Lrowind_bc_ptr
Definition: batch_factorize_marshall.h:112
double ** diag_ptrs
Definition: batch_factorize_marshall.h:111
Definition: batch_factorize_marshall.h:51
BatchDim_t * panel_ld_batch
Definition: batch_factorize_marshall.h:52
BatchDim_t * diag_ld_batch
Definition: batch_factorize_marshall.h:52
MarshallTRSMUFunc_flat(int_t k_st, double **diag_ptrs, BatchDim_t *diag_ld_batch, BatchDim_t *diag_dim_batch, double **panel_ptrs, BatchDim_t *panel_ld_batch, BatchDim_t *panel_dim_batch, double **Unzval_br_new_ptr, int_t **Ucolind_br_ptr, double **Lnzval_bc_ptr, int_t **Lrowind_bc_ptr, int_t *dperm_c_supno, int_t *xsup)
Definition: batch_factorize_marshall.h:56
int_t * xsup
Definition: batch_factorize_marshall.h:54
double ** Unzval_br_new_ptr
Definition: batch_factorize_marshall.h:53
__device__ void operator()(const int_t &i) const
Definition: batch_factorize_marshall.h:77
double ** panel_ptrs
Definition: batch_factorize_marshall.h:53
BatchDim_t * panel_dim_batch
Definition: batch_factorize_marshall.h:52
BatchDim_t * diag_dim_batch
Definition: batch_factorize_marshall.h:52
int_t ** Ucolind_br_ptr
Definition: batch_factorize_marshall.h:54
double ** diag_ptrs
Definition: batch_factorize_marshall.h:53
int_t ** Lrowind_bc_ptr
Definition: batch_factorize_marshall.h:54
double ** Lnzval_bc_ptr
Definition: batch_factorize_marshall.h:53
int_t * dperm_c_supno
Definition: batch_factorize_marshall.h:54
int_t k_st
Definition: batch_factorize_marshall.h:54
Definition: batch_factorize_marshall.h:248
UnaryOffsetPtrAssign(T *base_mem, offT *offsets, T **ptrs)
Definition: batch_factorize_marshall.h:252
T ** ptrs
Definition: batch_factorize_marshall.h:249
T * base_mem
Definition: batch_factorize_marshall.h:249
__host__ __device__ void operator()(const offT &index) const
Definition: batch_factorize_marshall.h:259
offT * offsets
Definition: batch_factorize_marshall.h:250
Definition: batch_factorize_marshall.h:278
__device__ T operator()(const T &x) const
Definition: batch_factorize_marshall.h:286
element_diff(T *st, T *end)
Definition: batch_factorize_marshall.h:280
T * st
Definition: batch_factorize_marshall.h:279
T * end
Definition: batch_factorize_marshall.h:279
Definitions which are precision-neutral.
#define SuperSize(bnum)
Definition: superlu_defs.h:271
int64_t int_t
Definition: superlu_defs.h:119
#define BC_HEADER
Definition: superlu_defs.h:198
int i
Definition: sutil_dist.c:287