|
SuperLU Distributed 9.0.0
gpu3d
|

Macros | |
| #define | CLAMP(x, low, high) (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x))) |
| #define | MAX_DIM 12800 |
| #define | MAX_IN_DIM 256 |
| #define | LOG_2_MAX_IN_DIM 8 |
| #define | LOG_2_MAX_DIM 7 |
Functions | |
| static double | load_imb (double *A, int nthreads) |
| double | get_acc_async_cost () |
| static unsigned int | next_power_2 (unsigned int v) |
| static unsigned int | previous_power_2 (unsigned int v) |
| static uint32_t | my_log2 (const uint32_t x) |
| static unsigned int | nearst_2_100 (unsigned int v) |
| static unsigned int | nearst_k (unsigned int v) |
| double | estimate_acc_time (int m, int n, int k) |
| double | estimate_acc_gemm_time (int m, int n, int k) |
| double | estimate_acc_scatter_time (int m, int n, int k) |
| double | estimate_cpu_time (int m, int n, int k) |
| double | acc_data_send_time (size_t sz) |
| void | LookUpTableInit (int my_rank) |
| double | estimate_acc_scatter_time_strat1 (Ublock_info_t *Ublock_info, int_t nub, Remain_info_t *Lblock_info, int_t nlb) |
| int_t | fixed_cpu_acc_partition (Ublock_info_t *Ublock_info_Phi, int_t num_u_blks_Phi, int_t Rnbrow, int_t ldu_Phi) |
| int_t | tuned_partition (int_t num_u_blks_Phi, Ublock_info_t *Ublock_info_Phi, Remain_info_t *Remain_info, int_t RemainBlk, double cpu_time_0, int_t Rnbrow, int_t ldu_Phi) |
Variables | |
| double | cpu_bandwidth |
| int | communication_overlap |
| double | acc_async_cost |
| int_t | fixed_partition |
| double | frac |
| double | CpuDgemmLookUp [8][8][9] |
| double | PhiDgemmLookUp [8][8][9] |
| double | PhiBWLookUp [8] |
| double | MicPciBandwidth [18] |
| double | MicScatterBW [24][24] |
| double | l_count [24] |
| double | u_count [24] |
| #define CLAMP | ( | x, | |
| low, | |||
| high | |||
| ) | (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x))) |
| #define LOG_2_MAX_DIM 7 |
| #define LOG_2_MAX_IN_DIM 8 |
| #define MAX_DIM 12800 |
| #define MAX_IN_DIM 256 |
| double acc_data_send_time | ( | size_t | sz | ) |

| double estimate_acc_gemm_time | ( | int | m, |
| int | n, | ||
| int | k | ||
| ) |


| double estimate_acc_scatter_time | ( | int | m, |
| int | n, | ||
| int | k | ||
| ) |

| double estimate_acc_scatter_time_strat1 | ( | Ublock_info_t * | Ublock_info, |
| int_t | nub, | ||
| Remain_info_t * | Lblock_info, | ||
| int_t | nlb | ||
| ) |

| double estimate_acc_time | ( | int | m, |
| int | n, | ||
| int | k | ||
| ) |


| double estimate_cpu_time | ( | int | m, |
| int | n, | ||
| int | k | ||
| ) |


| int_t fixed_cpu_acc_partition | ( | Ublock_info_t * | Ublock_info_Phi, |
| int_t | num_u_blks_Phi, | ||
| int_t | Rnbrow, | ||
| int_t | ldu_Phi | ||
| ) |

| double get_acc_async_cost | ( | ) |

|
inlinestatic |
| void LookUpTableInit | ( | int | my_rank | ) |


|
inlinestatic |

|
inlinestatic |


|
inlinestatic |


|
inlinestatic |
|
inlinestatic |
| int_t tuned_partition | ( | int_t | num_u_blks_Phi, |
| Ublock_info_t * | Ublock_info_Phi, | ||
| Remain_info_t * | Remain_info, | ||
| int_t | RemainBlk, | ||
| double | cpu_time_0, | ||
| int_t | Rnbrow, | ||
| int_t | ldu_Phi | ||
| ) |

| double acc_async_cost |
| int communication_overlap |
| double cpu_bandwidth |
| double CpuDgemmLookUp[8][8][9] |
| int_t fixed_partition |
| double frac |
| double l_count[24] |
| double MicPciBandwidth[18] |
| double MicScatterBW[24][24] |
| double PhiBWLookUp[8] |
| double PhiDgemmLookUp[8][8][9] |
| double u_count[24] |