SuperLU Distributed 9.0.0
gpu3d
gpu_wrapper.h
Go to the documentation of this file.
1
21#ifndef __SUPERLU_GPUWRAPPER /* allow multiple inclusions */
22#define __SUPERLU_GPUWRAPPER
23
24#ifdef HAVE_CUDA
25#include <cublas_v2.h>
26#include "cuda.h"
27#include "cuda_runtime_api.h"
28#include "cuda_runtime.h"
29#include <cusparse.h>
30#include <cuda_profiler_api.h>
31
32#define gpuDeviceProp cudaDeviceProp
33#define gpuGetDeviceCount cudaGetDeviceCount
34#define gpuGetDeviceProperties cudaGetDeviceProperties
35#define gpuSetDevice cudaSetDevice
36#define gpuGetDevice cudaGetDevice
37#define gpuError_t cudaError_t
38#define gpuSuccess cudaSuccess
39#define gpuGetErrorString cudaGetErrorString
40#define gpuMalloc cudaMalloc
41#define gpuHostMalloc cudaHostAlloc
42#define gpuHostMallocDefault cudaHostAllocDefault
43#define gpuMallocManaged cudaMallocManaged
44#define gpuStream_t cudaStream_t
45#define gpuStreamCreate cudaStreamCreate
46#define gpuStreamDestroy cudaStreamDestroy
47#define gpuMemcpyAsync cudaMemcpyAsync
48#define gpuMemcpy2DAsync cudaMemcpy2DAsync
49#define gpuFreeHost cudaFreeHost
50#define gpuFree cudaFree
51#define gpuMemPrefetchAsync cudaMemPrefetchAsync
52#define gpuStreamSynchronize cudaStreamSynchronize
53#define gpuDeviceSynchronize cudaDeviceSynchronize
54#define gpuMemcpy cudaMemcpy
55#define gpuMemAttachGlobal cudaMemAttachGlobal
56#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
57#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
58#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
59#define gpuStreamCreateWithFlags cudaStreamCreateWithFlags
60#define gpuStreamDestroyWithFlags cudaStreamDestroyWithFlags
61#define gpuStreamDefault cudaStreamDefault
62#define gpublasStatus_t cublasStatus_t
63#define gpuEventCreate cudaEventCreate
64#define gpuEventRecord cudaEventRecord
65#define gpuMemGetInfo cudaMemGetInfo
66#define gpuOccupancyMaxPotentialBlockSize cudaOccupancyMaxPotentialBlockSize
67#define gpuEventElapsedTime cudaEventElapsedTime
68#define gpuDeviceReset cudaDeviceReset
69#define gpuMallocHost cudaMallocHost
70#define gpuEvent_t cudaEvent_t
71#define gpuMemset cudaMemset
72#define GPUBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
73#define GPUBLAS_STATUS_NOT_INITIALIZED CUBLAS_STATUS_NOT_INITIALIZED
74#define GPUBLAS_STATUS_ALLOC_FAILED CUBLAS_STATUS_ALLOC_FAILED
75#define GPUBLAS_STATUS_INVALID_VALUE CUBLAS_STATUS_INVALID_VALUE
76#define GPUBLAS_STATUS_ARCH_MISMATCH CUBLAS_STATUS_ARCH_MISMATCH
77#define GPUBLAS_STATUS_MAPPING_ERROR CUBLAS_STATUS_MAPPING_ERROR
78#define GPUBLAS_STATUS_EXECUTION_FAILED CUBLAS_STATUS_EXECUTION_FAILED
79#define GPUBLAS_STATUS_INTERNAL_ERROR CUBLAS_STATUS_INTERNAL_ERROR
80#define GPUBLAS_STATUS_LICENSE_ERROR CUBLAS_STATUS_LICENSE_ERROR
81#define GPUBLAS_STATUS_NOT_SUPPORTED CUBLAS_STATUS_NOT_SUPPORTED
82#define gpublasCreate cublasCreate
83#define gpublasDestroy cublasDestroy
84#define gpublasHandle_t cublasHandle_t
85#define gpublasSetStream cublasSetStream
86#define gpublasDgemm cublasDgemm
87#define gpublasSgemm cublasSgemm
88#define gpublasZgemm cublasZgemm
89#define gpublasCgemm cublasCgemm
90#define GPUBLAS_OP_N CUBLAS_OP_N
91#define gpuDoubleComplex cuDoubleComplex
92#define gpuRuntimeGetVersion cudaRuntimeGetVersion
93#define gpuGetLastError cudaGetLastError
94#define threadIdx_x threadIdx.x
95#define threadIdx_y threadIdx.y
96#define blockIdx_x blockIdx.x
97#define blockIdx_y blockIdx.y
98#define blockDim_x blockDim.x
99#define blockDim_y blockDim.y
100#define gridDim_x gridDim.x
101#define gridDim_y gridDim.y
102
103
104
105
106#elif defined(HAVE_HIP)
107
108#ifndef __HIP_PLATFORM_AMD__
109#define __HIP_PLATFORM_AMD__
110#endif
111
112#include "hip/hip_runtime_api.h"
113#include "hip/hip_runtime.h"
114#include <hipblas/hipblas.h>
115
116// #include "roctracer_ext.h" // need to pass the include dir directly to HIP_HIPCC_FLAGS
117// // roctx header file
118// #include <roctx.h>
119
120#define gpuDeviceProp hipDeviceProp_t
121#define gpuGetDeviceCount hipGetDeviceCount
122#define gpuGetDeviceProperties hipGetDeviceProperties
123#define gpuSetDevice hipSetDevice
124#define gpuGetDevice hipGetDevice
125#define gpuError_t hipError_t
126#define gpuSuccess hipSuccess
127#define gpuGetErrorString hipGetErrorString
128#define gpuMalloc hipMalloc
129#define gpuHostMalloc hipHostMalloc
130#define gpuHostMallocDefault hipHostMallocDefault
131#define gpuMallocManaged hipMallocManaged
132#define gpuStream_t hipStream_t
133#define gpuStreamCreate hipStreamCreate
134#define gpuStreamDestroy hipStreamDestroy
135#define gpuMemcpyAsync hipMemcpyAsync
136#define gpuMemcpy2DAsync hipMemcpy2DAsync
137#define gpuFreeHost hipHostFree
138#define gpuFree hipFree
139#define gpuMemPrefetchAsync hipMemPrefetchAsync // not sure about this
140#define gpuStreamSynchronize hipStreamSynchronize
141#define gpuDeviceSynchronize hipDeviceSynchronize
142#define gpuMemcpy hipMemcpy
143#define gpuMemAttachGlobal hipMemAttachGlobal
144#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
145#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
146#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
147#define gpuStreamCreateWithFlags hipStreamCreateWithFlags
148#define gpuStreamDestroyWithFlags hipStreamDestroyWithFlags
149#define gpuStreamDefault hipStreamDefault
150#define gpublasStatus_t hipblasStatus_t
151#define gpuEventCreate hipEventCreate
152#define gpuEventRecord hipEventRecord
153#define gpuMemGetInfo hipMemGetInfo
154#define gpuOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
155#define gpuEventElapsedTime hipEventElapsedTime
156#define gpuDeviceReset hipDeviceReset
157#define gpuMallocHost hipHostMalloc
158#define gpuEvent_t hipEvent_t
159#define gpuMemset hipMemset
160#define GPUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
161#define GPUBLAS_STATUS_NOT_INITIALIZED HIPBLAS_STATUS_NOT_INITIALIZED
162#define GPUBLAS_STATUS_ALLOC_FAILED HIPBLAS_STATUS_ALLOC_FAILED
163#define GPUBLAS_STATUS_INVALID_VALUE HIPBLAS_STATUS_INVALID_VALUE
164#define GPUBLAS_STATUS_ARCH_MISMATCH HIPBLAS_STATUS_ARCH_MISMATCH
165#define GPUBLAS_STATUS_MAPPING_ERROR HIPBLAS_STATUS_MAPPING_ERROR
166#define GPUBLAS_STATUS_EXECUTION_FAILED HIPBLAS_STATUS_EXECUTION_FAILED
167#define GPUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR
168#define GPUBLAS_STATUS_LICENSE_ERROR HIPBLAS_STATUS_LICENSE_ERROR
169#define GPUBLAS_STATUS_NOT_SUPPORTED HIPBLAS_STATUS_NOT_SUPPORTED
170#define gpublasCreate hipblasCreate
171#define gpublasDestroy hipblasDestroy
172#define gpublasHandle_t hipblasHandle_t
173#define gpublasSetStream hipblasSetStream
174#define gpublasDgemm hipblasDgemm
175#define gpublasSgemm hipblasSgemm
176#define gpublasZgemm hipblasZgemm
177#define gpublasCgemm hipblasCgemm
178#define GPUBLAS_OP_N HIPBLAS_OP_N
179#define gpuDoubleComplex hipblasDoubleComplex
180#define gpuRuntimeGetVersion hipRuntimeGetVersion
181#define gpuGetLastError hipGetLastError
182#define threadIdx_x hipThreadIdx_x
183#define threadIdx_y hipThreadIdx_y
184#define blockIdx_x hipBlockIdx_x
185#define blockIdx_y hipBlockIdx_y
186#define blockDim_x hipBlockDim_x
187#define blockDim_y hipBlockDim_y
188#define gridDim_x hipGridDim_x
189#define gridDim_y hipGridDim_y
190
191
192#endif
193
194
195 #define gpublasCheckErrors(fn) \
196 do { \
197 gpublasStatus_t __err = fn; \
198 if (__err != GPUBLAS_STATUS_SUCCESS) { \
199 fprintf(stderr, "Fatal gpublas error: %d (at %s:%d)\n", \
200 (int)(__err), \
201 __FILE__, __LINE__); \
202 fprintf(stderr, "*** FAILED - ABORTING\n"); \
203 exit(1); \
204 } \
205 } while(0);
206
207
208#endif /* __SUPERLU_GPUWRAPPER */