SuperLU Distributed
8.2.1
Distributed memory sparse direct solver
gpu_wrapper.h
Go to the documentation of this file.
1
// #ifndef __SUPERLU_GPUWRAPER /* allow multiple inclusions */
2
// #define __SUPERLU_GPUWRAPER
3
4
5
#ifdef HAVE_CUDA
6
#include <cublas_v2.h>
7
#include "cuda.h"
8
#include "cuda_runtime_api.h"
9
#include "cuda_runtime.h"
10
#include <cusparse.h>
11
#include <cuda_profiler_api.h>
12
13
#define gpuDeviceProp cudaDeviceProp
14
#define gpuGetDeviceCount cudaGetDeviceCount
15
#define gpuGetDeviceProperties cudaGetDeviceProperties
16
#define gpuSetDevice cudaSetDevice
17
#define gpuGetDevice cudaGetDevice
18
#define gpuError_t cudaError_t
19
#define gpuSuccess cudaSuccess
20
#define gpuGetErrorString cudaGetErrorString
21
#define gpuMalloc cudaMalloc
22
#define gpuHostMalloc cudaHostAlloc
23
#define gpuHostMallocDefault cudaHostAllocDefault
24
#define gpuMallocManaged cudaMallocManaged
25
#define gpuStream_t cudaStream_t
26
#define gpuStreamCreate cudaStreamCreate
27
#define gpuMemcpyAsync cudaMemcpyAsync
28
#define gpuMemcpy2DAsync cudaMemcpy2DAsync
29
#define gpuFreeHost cudaFreeHost
30
#define gpuFree cudaFree
31
#define gpuMemPrefetchAsync cudaMemPrefetchAsync
32
#define gpuStreamSynchronize cudaStreamSynchronize
33
#define gpuDeviceSynchronize cudaDeviceSynchronize
34
#define gpuMemcpy cudaMemcpy
35
#define gpuMemAttachGlobal cudaMemAttachGlobal
36
#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
37
#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
38
#define gpuStreamCreateWithFlags cudaStreamCreateWithFlags
39
#define gpuStreamDefault cudaStreamDefault
40
#define gpublasStatus_t cublasStatus_t
41
#define gpuEventCreate cudaEventCreate
42
#define gpuEventRecord cudaEventRecord
43
#define gpuMemGetInfo cudaMemGetInfo
44
#define gpuOccupancyMaxPotentialBlockSize cudaOccupancyMaxPotentialBlockSize
45
#define gpuEventElapsedTime cudaEventElapsedTime
46
#define gpuDeviceReset cudaDeviceReset
47
#define gpuMallocHost cudaMallocHost
48
#define gpuEvent_t cudaEvent_t
49
#define gpuMemset cudaMemset
50
#define GPUBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
51
#define GPUBLAS_STATUS_NOT_INITIALIZED CUBLAS_STATUS_NOT_INITIALIZED
52
#define GPUBLAS_STATUS_ALLOC_FAILED CUBLAS_STATUS_ALLOC_FAILED
53
#define GPUBLAS_STATUS_INVALID_VALUE CUBLAS_STATUS_INVALID_VALUE
54
#define GPUBLAS_STATUS_ARCH_MISMATCH CUBLAS_STATUS_ARCH_MISMATCH
55
#define GPUBLAS_STATUS_MAPPING_ERROR CUBLAS_STATUS_MAPPING_ERROR
56
#define GPUBLAS_STATUS_EXECUTION_FAILED CUBLAS_STATUS_EXECUTION_FAILED
57
#define GPUBLAS_STATUS_INTERNAL_ERROR CUBLAS_STATUS_INTERNAL_ERROR
58
#define GPUBLAS_STATUS_LICENSE_ERROR CUBLAS_STATUS_LICENSE_ERROR
59
#define GPUBLAS_STATUS_NOT_SUPPORTED CUBLAS_STATUS_NOT_SUPPORTED
60
#define gpublasCreate cublasCreate
61
#define gpublasDestroy cublasDestroy
62
#define gpublasHandle_t cublasHandle_t
63
#define gpublasSetStream cublasSetStream
64
#define gpublasDgemm cublasDgemm
65
#define gpublasSgemm cublasSgemm
66
#define gpublasZgemm cublasZgemm
67
#define gpublasCgemm cublasCgemm
68
#define GPUBLAS_OP_N CUBLAS_OP_N
69
#define gpuDoubleComplex cuDoubleComplex
70
#define gpuRuntimeGetVersion cudaRuntimeGetVersion
71
#define threadIdx_x threadIdx.x
72
#define threadIdx_y threadIdx.y
73
#define blockIdx_x blockIdx.x
74
#define blockIdx_y blockIdx.y
75
#define blockDim_x blockDim.x
76
#define blockDim_y blockDim.y
77
#define gridDim_x gridDim.x
78
#define gridDim_y gridDim.y
79
80
81
82
83
#elif defined(HAVE_HIP)
84
85
#include "hip/hip_runtime_api.h"
86
#include "hip/hip_runtime.h"
87
#include "hipblas.h"
88
89
// #include "roctracer_ext.h" // need to pass the include dir directly to HIP_HIPCC_FLAGS
90
// // roctx header file
91
// #include <roctx.h>
92
93
#define gpuDeviceProp hipDeviceProp_t
94
#define gpuGetDeviceCount hipGetDeviceCount
95
#define gpuGetDeviceProperties hipGetDeviceProperties
96
#define gpuSetDevice hipSetDevice
97
#define gpuGetDevice hipGetDevice
98
#define gpuError_t hipError_t
99
#define gpuSuccess hipSuccess
100
#define gpuGetErrorString hipGetErrorString
101
#define gpuMalloc hipMalloc
102
#define gpuHostMalloc hipHostMalloc
103
#define gpuHostMallocDefault hipHostMallocDefault
104
#define gpuMallocManaged hipMallocManaged
105
#define gpuStream_t hipStream_t
106
#define gpuStreamCreate hipStreamCreate
107
#define gpuMemcpyAsync hipMemcpyAsync
108
#define gpuMemcpy2DAsync hipMemcpy2DAsync
109
#define gpuFreeHost hipHostFree
110
#define gpuFree hipFree
111
#define gpuMemPrefetchAsync hipMemPrefetchAsync
// not sure about this
112
#define gpuStreamSynchronize hipStreamSynchronize
113
#define gpuDeviceSynchronize hipDeviceSynchronize
114
#define gpuMemcpy hipMemcpy
115
#define gpuMemAttachGlobal hipMemAttachGlobal
116
#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
117
#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
118
#define gpuStreamCreateWithFlags hipStreamCreateWithFlags
119
#define gpuStreamDefault hipStreamDefault
120
#define gpublasStatus_t hipblasStatus_t
121
#define gpuEventCreate hipEventCreate
122
#define gpuEventRecord hipEventRecord
123
#define gpuMemGetInfo hipMemGetInfo
124
#define gpuOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
125
#define gpuEventElapsedTime hipEventElapsedTime
126
#define gpuDeviceReset hipDeviceReset
127
#define gpuMallocHost hipHostMalloc
128
#define gpuEvent_t hipEvent_t
129
#define gpuMemset hipMemset
130
#define GPUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
131
#define GPUBLAS_STATUS_NOT_INITIALIZED HIPBLAS_STATUS_NOT_INITIALIZED
132
#define GPUBLAS_STATUS_ALLOC_FAILED HIPBLAS_STATUS_ALLOC_FAILED
133
#define GPUBLAS_STATUS_INVALID_VALUE HIPBLAS_STATUS_INVALID_VALUE
134
#define GPUBLAS_STATUS_ARCH_MISMATCH HIPBLAS_STATUS_ARCH_MISMATCH
135
#define GPUBLAS_STATUS_MAPPING_ERROR HIPBLAS_STATUS_MAPPING_ERROR
136
#define GPUBLAS_STATUS_EXECUTION_FAILED HIPBLAS_STATUS_EXECUTION_FAILED
137
#define GPUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR
138
#define GPUBLAS_STATUS_LICENSE_ERROR HIPBLAS_STATUS_LICENSE_ERROR
139
#define GPUBLAS_STATUS_NOT_SUPPORTED HIPBLAS_STATUS_NOT_SUPPORTED
140
#define gpublasCreate hipblasCreate
141
#define gpublasDestroy hipblasDestroy
142
#define gpublasHandle_t hipblasHandle_t
143
#define gpublasSetStream hipblasSetStream
144
#define gpublasDgemm hipblasDgemm
145
#define gpublasSgemm hipblasSgemm
146
#define gpublasZgemm hipblasZgemm
147
#define gpublasCgemm hipblasCgemm
148
#define GPUBLAS_OP_N HIPBLAS_OP_N
149
#define gpuDoubleComplex hipblasDoubleComplex
150
#define gpuRuntimeGetVersion hipRuntimeGetVersion
151
#define threadIdx_x hipThreadIdx_x
152
#define threadIdx_y hipThreadIdx_y
153
#define blockIdx_x hipBlockIdx_x
154
#define blockIdx_y hipBlockIdx_y
155
#define blockDim_x hipBlockDim_x
156
#define blockDim_y hipBlockDim_y
157
#define gridDim_x hipGridDim_x
158
#define gridDim_y hipGridDim_y
159
160
161
#endif
162
163
164
#define gpublasCheckErrors(fn) \
165
do { \
166
gpublasStatus_t __err = fn; \
167
if (__err != GPUBLAS_STATUS_SUCCESS) { \
168
fprintf(stderr, "Fatal gpublas error: %d (at %s:%d)\n"
, \
169
(int)(__err), \
170
__FILE__, __LINE__); \
171
fprintf(stderr, "*** FAILED - ABORTING\n"); \
172
exit(1); \
173
} \
174
} while(0);
SRC
BAK
gpu_wrapper.h
Generated by
1.9.4