SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
acc_aux.c File Reference
#include "acc_aux.h"
#include <stdint.h>
Include dependency graph for acc_aux.c:

Macros

#define CLAMP(x, low, high)   (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x)))
 
#define MAX_DIM   12800
 
#define MAX_IN_DIM   256
 
#define LOG_2_MAX_IN_DIM   8
 
#define LOG_2_MAX_DIM   7
 

Functions

static double load_imb (double *A, int nthreads)
 
double get_acc_async_cost ()
 
static unsigned int next_power_2 (unsigned int v)
 
static unsigned int previous_power_2 (unsigned int v)
 
static uint32_t my_log2 (const uint32_t x)
 
static unsigned int nearst_2_100 (unsigned int v)
 
static unsigned int nearst_k (unsigned int v)
 
double estimate_acc_time (int m, int n, int k)
 
double estimate_acc_gemm_time (int m, int n, int k)
 
double estimate_acc_scatter_time (int m, int n, int k)
 
double estimate_cpu_time (int m, int n, int k)
 
double acc_data_send_time (size_t sz)
 
void LookUpTableInit (int my_rank)
 
double estimate_acc_scatter_time_strat1 (Ublock_info_t *Ublock_info, int_t nub, Remain_info_t *Lblock_info, int_t nlb)
 
int_t fixed_cpu_acc_partition (Ublock_info_t *Ublock_info_Phi, int_t num_u_blks_Phi, int_t Rnbrow, int_t ldu_Phi)
 
int_t tuned_partition (int_t num_u_blks_Phi, Ublock_info_t *Ublock_info_Phi, Remain_info_t *Remain_info, int_t RemainBlk, double cpu_time_0, int_t Rnbrow, int_t ldu_Phi)
 

Variables

double cpu_bandwidth
 
int communication_overlap
 
double acc_async_cost
 
int_t fixed_partition
 
double frac
 
double CpuDgemmLookUp [8][8][9]
 
double PhiDgemmLookUp [8][8][9]
 
double PhiBWLookUp [8]
 
double MicPciBandwidth [18]
 
double MicScatterBW [24][24]
 
double l_count [24]
 
double u_count [24]
 

Macro Definition Documentation

◆ CLAMP

#define CLAMP (   x,
  low,
  high 
)    (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x)))

◆ LOG_2_MAX_DIM

#define LOG_2_MAX_DIM   7

◆ LOG_2_MAX_IN_DIM

#define LOG_2_MAX_IN_DIM   8

◆ MAX_DIM

#define MAX_DIM   12800

◆ MAX_IN_DIM

#define MAX_IN_DIM   256

Function Documentation

◆ acc_data_send_time()

double acc_data_send_time ( size_t  sz)
Here is the call graph for this function:

◆ estimate_acc_gemm_time()

double estimate_acc_gemm_time ( int  m,
int  n,
int  k 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ estimate_acc_scatter_time()

double estimate_acc_scatter_time ( int  m,
int  n,
int  k 
)
Here is the call graph for this function:

◆ estimate_acc_scatter_time_strat1()

double estimate_acc_scatter_time_strat1 ( Ublock_info_t Ublock_info,
int_t  nub,
Remain_info_t Lblock_info,
int_t  nlb 
)
Here is the caller graph for this function:

◆ estimate_acc_time()

double estimate_acc_time ( int  m,
int  n,
int  k 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ estimate_cpu_time()

double estimate_cpu_time ( int  m,
int  n,
int  k 
)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fixed_cpu_acc_partition()

int_t fixed_cpu_acc_partition ( Ublock_info_t Ublock_info_Phi,
int_t  num_u_blks_Phi,
int_t  Rnbrow,
int_t  ldu_Phi 
)
Here is the call graph for this function:

◆ get_acc_async_cost()

double get_acc_async_cost ( )
Here is the caller graph for this function:

◆ load_imb()

static double load_imb ( double *  A,
int  nthreads 
)
inlinestatic

◆ LookUpTableInit()

void LookUpTableInit ( int  my_rank)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ my_log2()

static uint32_t my_log2 ( const uint32_t  x)
inlinestatic
Here is the caller graph for this function:

◆ nearst_2_100()

static unsigned int nearst_2_100 ( unsigned int  v)
inlinestatic
Here is the call graph for this function:
Here is the caller graph for this function:

◆ nearst_k()

static unsigned int nearst_k ( unsigned int  v)
inlinestatic
Here is the call graph for this function:
Here is the caller graph for this function:

◆ next_power_2()

static unsigned int next_power_2 ( unsigned int  v)
inlinestatic

◆ previous_power_2()

static unsigned int previous_power_2 ( unsigned int  v)
inlinestatic

◆ tuned_partition()

int_t tuned_partition ( int_t  num_u_blks_Phi,
Ublock_info_t Ublock_info_Phi,
Remain_info_t Remain_info,
int_t  RemainBlk,
double  cpu_time_0,
int_t  Rnbrow,
int_t  ldu_Phi 
)
Here is the call graph for this function:
Here is the caller graph for this function:

Variable Documentation

◆ acc_async_cost

double acc_async_cost

◆ communication_overlap

int communication_overlap

◆ cpu_bandwidth

double cpu_bandwidth

◆ CpuDgemmLookUp

double CpuDgemmLookUp[8][8][9]

◆ fixed_partition

int_t fixed_partition

◆ frac

double frac

◆ l_count

double l_count[24]

◆ MicPciBandwidth

double MicPciBandwidth[18]

◆ MicScatterBW

double MicScatterBW[24][24]

◆ PhiBWLookUp

double PhiBWLookUp[8]

◆ PhiDgemmLookUp

double PhiDgemmLookUp[8][8][9]

◆ u_count

double u_count[24]