SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
acc_aux.h
Go to the documentation of this file.
1#pragma once
2
3// #include "pdgstrf.h"
4
5
6typedef struct mdwin_t
7{
8 double cpu_bandwidth;
10 double acc_async_cost;
11
12
14 double frac;
15
16 double CpuDgemmLookUp[8][8][9];
17 double PhiDgemmLookUp[8][8][9];
18 double PhiBWLookUp[8];
19 double MicPciBandwidth[18];
20 double MicScatterBW[24][24];
21
22#ifdef OFFLOAD_PROFILE
23 double MicScatterTLI[MAX_BLOCK_SIZE / STEPPING][MAX_BLOCK_SIZE / STEPPING];
24 double host_scu_flop_rate[CBLOCK / CSTEPPING][CBLOCK / CSTEPPING][CBLOCK / CSTEPPING];
25#endif
27
30
31double get_acc_async_cost();
32
33double estimate_acc_time(int m, int n , int k);
34
35double estimate_acc_gemm_time(int m, int n , int k);
36
37double estimate_acc_scatter_time(int m, int n , int k);
38
39double estimate_cpu_time(int m, int n , int k);
40
41double acc_data_send_time(size_t sz);
42
43void LookUpTableInit(int my_rank);
44
45
46int_t fixed_cpu_acc_partition (Ublock_info_t *Ublock_info_Phi, int_t num_u_blks_Phi , int_t Rnbrow, int_t ldu_Phi);
47int_t tuned_partition(int_t num_u_blks_Phi, Ublock_info_t *Ublock_info_Phi, Remain_info_t* Remain_info,
48 int_t RemainBlk, double cpu_time_0, int_t Rnbrow, int_t ldu_Phi );
int int_t
Definition: superlu_defs.h:114
#define CBLOCK
Definition: util_dist.h:78
#define CSTEPPING
Definition: util_dist.h:80
double acc_data_send_time(size_t sz)
Definition: acc_aux.c:227
int_t tuned_partition(int_t num_u_blks_Phi, Ublock_info_t *Ublock_info_Phi, Remain_info_t *Remain_info, int_t RemainBlk, double cpu_time_0, int_t Rnbrow, int_t ldu_Phi)
Definition: acc_aux.c:624
struct mdwin_t mdwin_t
double estimate_cpu_time(int m, int n, int k)
Definition: acc_aux.c:214
void LookUpTableInit(int my_rank)
Definition: acc_aux.c:234
double get_acc_async_cost()
Definition: acc_aux.c:42
int_t fixed_cpu_acc_partition(Ublock_info_t *Ublock_info_Phi, int_t num_u_blks_Phi, int_t Rnbrow, int_t ldu_Phi)
Definition: acc_aux.c:584
int_t get_max_buffer_size()
Definition: util.c:815
double estimate_acc_scatter_time(int m, int n, int k)
Definition: acc_aux.c:194
double estimate_acc_time(int m, int n, int k)
Definition: acc_aux.c:143
double estimate_acc_gemm_time(int m, int n, int k)
Definition: acc_aux.c:174
Definition: superlu_defs.h:770
Definition: superlu_defs.h:760
Definition: acc_aux.h:7
double CpuDgemmLookUp[8][8][9]
Definition: acc_aux.h:16
double frac
Definition: acc_aux.h:14
double PhiDgemmLookUp[8][8][9]
Definition: acc_aux.h:17
double acc_async_cost
Definition: acc_aux.h:10
double PhiBWLookUp[8]
Definition: acc_aux.h:18
int communication_overlap
Definition: acc_aux.h:9
int_t fixed_partition
Definition: acc_aux.h:13
double cpu_bandwidth
Definition: acc_aux.h:8
double MicPciBandwidth[18]
Definition: acc_aux.h:19
double MicScatterBW[24][24]
Definition: acc_aux.h:20