SuperLU Distributed
9.0.0
gpu3d
gpu_wrapper.h
Go to the documentation of this file.
1
21
#ifndef __SUPERLU_GPUWRAPPER
/* allow multiple inclusions */
22
#define __SUPERLU_GPUWRAPPER
23
24
#ifdef HAVE_CUDA
25
#include <cublas_v2.h>
26
#include "cuda.h"
27
#include "cuda_runtime_api.h"
28
#include "cuda_runtime.h"
29
#include <cusparse.h>
30
#include <cuda_profiler_api.h>
31
32
#define gpuDeviceProp cudaDeviceProp
33
#define gpuGetDeviceCount cudaGetDeviceCount
34
#define gpuGetDeviceProperties cudaGetDeviceProperties
35
#define gpuSetDevice cudaSetDevice
36
#define gpuGetDevice cudaGetDevice
37
#define gpuError_t cudaError_t
38
#define gpuSuccess cudaSuccess
39
#define gpuGetErrorString cudaGetErrorString
40
#define gpuMalloc cudaMalloc
41
#define gpuHostMalloc cudaHostAlloc
42
#define gpuHostMallocDefault cudaHostAllocDefault
43
#define gpuMallocManaged cudaMallocManaged
44
#define gpuStream_t cudaStream_t
45
#define gpuStreamCreate cudaStreamCreate
46
#define gpuStreamDestroy cudaStreamDestroy
47
#define gpuMemcpyAsync cudaMemcpyAsync
48
#define gpuMemcpy2DAsync cudaMemcpy2DAsync
49
#define gpuFreeHost cudaFreeHost
50
#define gpuFree cudaFree
51
#define gpuMemPrefetchAsync cudaMemPrefetchAsync
52
#define gpuStreamSynchronize cudaStreamSynchronize
53
#define gpuDeviceSynchronize cudaDeviceSynchronize
54
#define gpuMemcpy cudaMemcpy
55
#define gpuMemAttachGlobal cudaMemAttachGlobal
56
#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
57
#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
58
#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
59
#define gpuStreamCreateWithFlags cudaStreamCreateWithFlags
60
#define gpuStreamDestroyWithFlags cudaStreamDestroyWithFlags
61
#define gpuStreamDefault cudaStreamDefault
62
#define gpublasStatus_t cublasStatus_t
63
#define gpuEventCreate cudaEventCreate
64
#define gpuEventRecord cudaEventRecord
65
#define gpuMemGetInfo cudaMemGetInfo
66
#define gpuOccupancyMaxPotentialBlockSize cudaOccupancyMaxPotentialBlockSize
67
#define gpuEventElapsedTime cudaEventElapsedTime
68
#define gpuDeviceReset cudaDeviceReset
69
#define gpuMallocHost cudaMallocHost
70
#define gpuEvent_t cudaEvent_t
71
#define gpuMemset cudaMemset
72
#define GPUBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
73
#define GPUBLAS_STATUS_NOT_INITIALIZED CUBLAS_STATUS_NOT_INITIALIZED
74
#define GPUBLAS_STATUS_ALLOC_FAILED CUBLAS_STATUS_ALLOC_FAILED
75
#define GPUBLAS_STATUS_INVALID_VALUE CUBLAS_STATUS_INVALID_VALUE
76
#define GPUBLAS_STATUS_ARCH_MISMATCH CUBLAS_STATUS_ARCH_MISMATCH
77
#define GPUBLAS_STATUS_MAPPING_ERROR CUBLAS_STATUS_MAPPING_ERROR
78
#define GPUBLAS_STATUS_EXECUTION_FAILED CUBLAS_STATUS_EXECUTION_FAILED
79
#define GPUBLAS_STATUS_INTERNAL_ERROR CUBLAS_STATUS_INTERNAL_ERROR
80
#define GPUBLAS_STATUS_LICENSE_ERROR CUBLAS_STATUS_LICENSE_ERROR
81
#define GPUBLAS_STATUS_NOT_SUPPORTED CUBLAS_STATUS_NOT_SUPPORTED
82
#define gpublasCreate cublasCreate
83
#define gpublasDestroy cublasDestroy
84
#define gpublasHandle_t cublasHandle_t
85
#define gpublasSetStream cublasSetStream
86
#define gpublasDgemm cublasDgemm
87
#define gpublasSgemm cublasSgemm
88
#define gpublasZgemm cublasZgemm
89
#define gpublasCgemm cublasCgemm
90
#define GPUBLAS_OP_N CUBLAS_OP_N
91
#define gpuDoubleComplex cuDoubleComplex
92
#define gpuRuntimeGetVersion cudaRuntimeGetVersion
93
#define gpuGetLastError cudaGetLastError
94
#define threadIdx_x threadIdx.x
95
#define threadIdx_y threadIdx.y
96
#define blockIdx_x blockIdx.x
97
#define blockIdx_y blockIdx.y
98
#define blockDim_x blockDim.x
99
#define blockDim_y blockDim.y
100
#define gridDim_x gridDim.x
101
#define gridDim_y gridDim.y
102
103
104
105
106
#elif defined(HAVE_HIP)
107
108
#ifndef __HIP_PLATFORM_AMD__
109
#define __HIP_PLATFORM_AMD__
110
#endif
111
112
#include "hip/hip_runtime_api.h"
113
#include "hip/hip_runtime.h"
114
#include <hipblas/hipblas.h>
115
116
// #include "roctracer_ext.h" // need to pass the include dir directly to HIP_HIPCC_FLAGS
117
// // roctx header file
118
// #include <roctx.h>
119
120
#define gpuDeviceProp hipDeviceProp_t
121
#define gpuGetDeviceCount hipGetDeviceCount
122
#define gpuGetDeviceProperties hipGetDeviceProperties
123
#define gpuSetDevice hipSetDevice
124
#define gpuGetDevice hipGetDevice
125
#define gpuError_t hipError_t
126
#define gpuSuccess hipSuccess
127
#define gpuGetErrorString hipGetErrorString
128
#define gpuMalloc hipMalloc
129
#define gpuHostMalloc hipHostMalloc
130
#define gpuHostMallocDefault hipHostMallocDefault
131
#define gpuMallocManaged hipMallocManaged
132
#define gpuStream_t hipStream_t
133
#define gpuStreamCreate hipStreamCreate
134
#define gpuStreamDestroy hipStreamDestroy
135
#define gpuMemcpyAsync hipMemcpyAsync
136
#define gpuMemcpy2DAsync hipMemcpy2DAsync
137
#define gpuFreeHost hipHostFree
138
#define gpuFree hipFree
139
#define gpuMemPrefetchAsync hipMemPrefetchAsync
// not sure about this
140
#define gpuStreamSynchronize hipStreamSynchronize
141
#define gpuDeviceSynchronize hipDeviceSynchronize
142
#define gpuMemcpy hipMemcpy
143
#define gpuMemAttachGlobal hipMemAttachGlobal
144
#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
145
#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
146
#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
147
#define gpuStreamCreateWithFlags hipStreamCreateWithFlags
148
#define gpuStreamDestroyWithFlags hipStreamDestroyWithFlags
149
#define gpuStreamDefault hipStreamDefault
150
#define gpublasStatus_t hipblasStatus_t
151
#define gpuEventCreate hipEventCreate
152
#define gpuEventRecord hipEventRecord
153
#define gpuMemGetInfo hipMemGetInfo
154
#define gpuOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
155
#define gpuEventElapsedTime hipEventElapsedTime
156
#define gpuDeviceReset hipDeviceReset
157
#define gpuMallocHost hipHostMalloc
158
#define gpuEvent_t hipEvent_t
159
#define gpuMemset hipMemset
160
#define GPUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
161
#define GPUBLAS_STATUS_NOT_INITIALIZED HIPBLAS_STATUS_NOT_INITIALIZED
162
#define GPUBLAS_STATUS_ALLOC_FAILED HIPBLAS_STATUS_ALLOC_FAILED
163
#define GPUBLAS_STATUS_INVALID_VALUE HIPBLAS_STATUS_INVALID_VALUE
164
#define GPUBLAS_STATUS_ARCH_MISMATCH HIPBLAS_STATUS_ARCH_MISMATCH
165
#define GPUBLAS_STATUS_MAPPING_ERROR HIPBLAS_STATUS_MAPPING_ERROR
166
#define GPUBLAS_STATUS_EXECUTION_FAILED HIPBLAS_STATUS_EXECUTION_FAILED
167
#define GPUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR
168
#define GPUBLAS_STATUS_LICENSE_ERROR HIPBLAS_STATUS_LICENSE_ERROR
169
#define GPUBLAS_STATUS_NOT_SUPPORTED HIPBLAS_STATUS_NOT_SUPPORTED
170
#define gpublasCreate hipblasCreate
171
#define gpublasDestroy hipblasDestroy
172
#define gpublasHandle_t hipblasHandle_t
173
#define gpublasSetStream hipblasSetStream
174
#define gpublasDgemm hipblasDgemm
175
#define gpublasSgemm hipblasSgemm
176
#define gpublasZgemm hipblasZgemm
177
#define gpublasCgemm hipblasCgemm
178
#define GPUBLAS_OP_N HIPBLAS_OP_N
179
#define gpuDoubleComplex hipblasDoubleComplex
180
#define gpuRuntimeGetVersion hipRuntimeGetVersion
181
#define gpuGetLastError hipGetLastError
182
#define threadIdx_x hipThreadIdx_x
183
#define threadIdx_y hipThreadIdx_y
184
#define blockIdx_x hipBlockIdx_x
185
#define blockIdx_y hipBlockIdx_y
186
#define blockDim_x hipBlockDim_x
187
#define blockDim_y hipBlockDim_y
188
#define gridDim_x hipGridDim_x
189
#define gridDim_y hipGridDim_y
190
191
192
#endif
193
194
195
#define gpublasCheckErrors(fn) \
196
do { \
197
gpublasStatus_t __err = fn; \
198
if (__err != GPUBLAS_STATUS_SUCCESS) { \
199
fprintf(stderr, "Fatal gpublas error: %d (at %s:%d)\n"
, \
200
(int)(__err), \
201
__FILE__, __LINE__); \
202
fprintf(stderr, "*** FAILED - ABORTING\n"); \
203
exit(1); \
204
} \
205
} while(0);
206
207
208
#endif
/* __SUPERLU_GPUWRAPPER */
SRC
include
gpu_wrapper.h
Generated by
1.9.4