SuperLU Distributed 9.0.0
gpu3d
batch_factorize.h
Go to the documentation of this file.
1#ifndef __SUPERLU_BATCH_FACTORIZE_H__
2#define __SUPERLU_BATCH_FACTORIZE_H__
3
4#include "superlu_ddefs.h"
5//#include "superlu_summit.h"
6
7#ifdef HAVE_MAGMA
8#include "magma.h"
9#define BatchDim_t magma_int_t
10#else
11#define BatchDim_t int_t
12#endif
13
14// Device memory used to store marshalled batch data for LU and TRSM
16{
19
20 // Diagonal device pointer data
21 double **dev_diag_ptrs;
23
24 // TRSM panel device pointer data
27
28 // Max of marshalled device data
30
31 // Number of marshalled operations
33
34 void setBatchSize(BatchDim_t batch_size);
35};
36
37// Device memory used to store marshalled batch data for Schur complement update
39{
42
43 // GEMM device pointer data
47
48 // Panel device pointer data and scu loop limits
50
51 // Max of marshalled gemm device data
53
54 // Max of marshalled loop limits
56
57 // Number of marshalled operations
59
60 void setBatchSize(BatchDim_t batch_size);
61};
62
64 // Library handles
65#ifdef HAVE_MAGMA
66 magma_queue_t magma_queue;
67#endif
68 cudaStream_t stream;
69 cublasHandle_t cuhandle;
70
71 // Marshall data
74
75 // GPU copy of the supernode data
78
79 // GPU copy of the local LU data
81
82 // GPU buffers for the SCU gemms
85
86 // Copy of the lower panel index data in a more parallel friendly format
91};
92
93#endif
#define BatchDim_t
Definition: batch_factorize.h:11
Definition: batch_factorize.h:63
double ** gemm_buff_ptrs
Definition: batch_factorize.h:83
int64_t total_l_blocks
Definition: batch_factorize.h:90
cublasHandle_t cuhandle
Definition: batch_factorize.h:69
int_t ldt
Definition: batch_factorize.h:77
int_t * d_lblock_start_dat
Definition: batch_factorize.h:88
int_t ** d_lblock_start_ptrs
Definition: batch_factorize.h:88
int64_t * d_lblock_start_offsets
Definition: batch_factorize.h:89
int_t maxSuperSize
Definition: batch_factorize.h:77
BatchSCUMarshallData sc_marshall_data
Definition: batch_factorize.h:73
dLocalLU_t d_localLU
Definition: batch_factorize.h:80
int64_t * d_lblock_gid_offsets
Definition: batch_factorize.h:89
int_t * d_lblock_gid_dat
Definition: batch_factorize.h:87
int_t nsupers
Definition: batch_factorize.h:77
int64_t * gemm_buff_offsets
Definition: batch_factorize.h:84
cudaStream_t stream
Definition: batch_factorize.h:68
int64_t total_start_size
Definition: batch_factorize.h:90
int_t ** d_lblock_gid_ptrs
Definition: batch_factorize.h:87
int_t * xsup
Definition: batch_factorize.h:76
int_t * perm_c_supno
Definition: batch_factorize.h:76
BatchLUMarshallData marshall_data
Definition: batch_factorize.h:72
double * gemm_buff_base
Definition: batch_factorize.h:83
Definition: batch_factorize.h:16
BatchDim_t max_diag
Definition: batch_factorize.h:29
void setBatchSize(BatchDim_t batch_size)
BatchDim_t batchsize
Definition: batch_factorize.h:32
double ** dev_diag_ptrs
Definition: batch_factorize.h:21
BatchDim_t max_panel
Definition: batch_factorize.h:29
BatchDim_t * dev_panel_dim_array
Definition: batch_factorize.h:26
BatchDim_t * dev_info_array
Definition: batch_factorize.h:22
BatchDim_t * dev_diag_ld_array
Definition: batch_factorize.h:22
BatchDim_t * dev_panel_ld_array
Definition: batch_factorize.h:26
BatchDim_t * dev_diag_dim_array
Definition: batch_factorize.h:22
double ** dev_panel_ptrs
Definition: batch_factorize.h:25
Definition: batch_factorize.h:39
BatchDim_t * dev_ist
Definition: batch_factorize.h:49
BatchDim_t max_ilen
Definition: batch_factorize.h:55
BatchDim_t * dev_iend
Definition: batch_factorize.h:49
BatchDim_t * dev_jend
Definition: batch_factorize.h:49
void setBatchSize(BatchDim_t batch_size)
BatchDim_t * dev_jst
Definition: batch_factorize.h:49
double ** dev_C_ptrs
Definition: batch_factorize.h:44
BatchDim_t * dev_k_array
Definition: batch_factorize.h:46
double ** dev_A_ptrs
Definition: batch_factorize.h:44
BatchDim_t max_jlen
Definition: batch_factorize.h:55
BatchDim_t * dev_n_array
Definition: batch_factorize.h:46
BatchDim_t max_m
Definition: batch_factorize.h:52
BatchDim_t * dev_m_array
Definition: batch_factorize.h:46
BatchDim_t max_k
Definition: batch_factorize.h:52
BatchDim_t * dev_ldb_array
Definition: batch_factorize.h:45
BatchDim_t max_n
Definition: batch_factorize.h:52
BatchDim_t * dev_lda_array
Definition: batch_factorize.h:45
BatchDim_t batchsize
Definition: batch_factorize.h:58
BatchDim_t * dev_ldc_array
Definition: batch_factorize.h:45
double ** dev_B_ptrs
Definition: batch_factorize.h:44
Definition: superlu_ddefs.h:97
Distributed SuperLU data types and function prototypes.
int64_t int_t
Definition: superlu_defs.h:119