SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
gpu_wrapper.h
Go to the documentation of this file.
1// #ifndef __SUPERLU_GPUWRAPER /* allow multiple inclusions */
2// #define __SUPERLU_GPUWRAPER
3
4
5#ifdef HAVE_CUDA
6#include <cublas_v2.h>
7#include "cuda.h"
8#include "cuda_runtime_api.h"
9#include "cuda_runtime.h"
10#include <cusparse.h>
11#include <cuda_profiler_api.h>
12
13#define gpuDeviceProp cudaDeviceProp
14#define gpuGetDeviceCount cudaGetDeviceCount
15#define gpuGetDeviceProperties cudaGetDeviceProperties
16#define gpuSetDevice cudaSetDevice
17#define gpuGetDevice cudaGetDevice
18#define gpuError_t cudaError_t
19#define gpuSuccess cudaSuccess
20#define gpuGetErrorString cudaGetErrorString
21#define gpuMalloc cudaMalloc
22#define gpuHostMalloc cudaHostAlloc
23#define gpuHostMallocDefault cudaHostAllocDefault
24#define gpuMallocManaged cudaMallocManaged
25#define gpuStream_t cudaStream_t
26#define gpuStreamCreate cudaStreamCreate
27#define gpuMemcpyAsync cudaMemcpyAsync
28#define gpuMemcpy2DAsync cudaMemcpy2DAsync
29#define gpuFreeHost cudaFreeHost
30#define gpuFree cudaFree
31#define gpuMemPrefetchAsync cudaMemPrefetchAsync
32#define gpuStreamSynchronize cudaStreamSynchronize
33#define gpuDeviceSynchronize cudaDeviceSynchronize
34#define gpuMemcpy cudaMemcpy
35#define gpuMemAttachGlobal cudaMemAttachGlobal
36#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
37#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
38#define gpuStreamCreateWithFlags cudaStreamCreateWithFlags
39#define gpuStreamDefault cudaStreamDefault
40#define gpublasStatus_t cublasStatus_t
41#define gpuEventCreate cudaEventCreate
42#define gpuEventRecord cudaEventRecord
43#define gpuMemGetInfo cudaMemGetInfo
44#define gpuOccupancyMaxPotentialBlockSize cudaOccupancyMaxPotentialBlockSize
45#define gpuEventElapsedTime cudaEventElapsedTime
46#define gpuDeviceReset cudaDeviceReset
47#define gpuMallocHost cudaMallocHost
48#define gpuEvent_t cudaEvent_t
49#define gpuMemset cudaMemset
50#define GPUBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
51#define GPUBLAS_STATUS_NOT_INITIALIZED CUBLAS_STATUS_NOT_INITIALIZED
52#define GPUBLAS_STATUS_ALLOC_FAILED CUBLAS_STATUS_ALLOC_FAILED
53#define GPUBLAS_STATUS_INVALID_VALUE CUBLAS_STATUS_INVALID_VALUE
54#define GPUBLAS_STATUS_ARCH_MISMATCH CUBLAS_STATUS_ARCH_MISMATCH
55#define GPUBLAS_STATUS_MAPPING_ERROR CUBLAS_STATUS_MAPPING_ERROR
56#define GPUBLAS_STATUS_EXECUTION_FAILED CUBLAS_STATUS_EXECUTION_FAILED
57#define GPUBLAS_STATUS_INTERNAL_ERROR CUBLAS_STATUS_INTERNAL_ERROR
58#define GPUBLAS_STATUS_LICENSE_ERROR CUBLAS_STATUS_LICENSE_ERROR
59#define GPUBLAS_STATUS_NOT_SUPPORTED CUBLAS_STATUS_NOT_SUPPORTED
60#define gpublasCreate cublasCreate
61#define gpublasDestroy cublasDestroy
62#define gpublasHandle_t cublasHandle_t
63#define gpublasSetStream cublasSetStream
64#define gpublasDgemm cublasDgemm
65#define gpublasSgemm cublasSgemm
66#define gpublasZgemm cublasZgemm
67#define gpublasCgemm cublasCgemm
68#define GPUBLAS_OP_N CUBLAS_OP_N
69#define gpuDoubleComplex cuDoubleComplex
70#define gpuRuntimeGetVersion cudaRuntimeGetVersion
71#define threadIdx_x threadIdx.x
72#define threadIdx_y threadIdx.y
73#define blockIdx_x blockIdx.x
74#define blockIdx_y blockIdx.y
75#define blockDim_x blockDim.x
76#define blockDim_y blockDim.y
77#define gridDim_x gridDim.x
78#define gridDim_y gridDim.y
79
80
81
82
83#elif defined(HAVE_HIP)
84
85#include "hip/hip_runtime_api.h"
86#include "hip/hip_runtime.h"
87#include "hipblas.h"
88
89// #include "roctracer_ext.h" // need to pass the include dir directly to HIP_HIPCC_FLAGS
90// // roctx header file
91// #include <roctx.h>
92
93#define gpuDeviceProp hipDeviceProp_t
94#define gpuGetDeviceCount hipGetDeviceCount
95#define gpuGetDeviceProperties hipGetDeviceProperties
96#define gpuSetDevice hipSetDevice
97#define gpuGetDevice hipGetDevice
98#define gpuError_t hipError_t
99#define gpuSuccess hipSuccess
100#define gpuGetErrorString hipGetErrorString
101#define gpuMalloc hipMalloc
102#define gpuHostMalloc hipHostMalloc
103#define gpuHostMallocDefault hipHostMallocDefault
104#define gpuMallocManaged hipMallocManaged
105#define gpuStream_t hipStream_t
106#define gpuStreamCreate hipStreamCreate
107#define gpuMemcpyAsync hipMemcpyAsync
108#define gpuMemcpy2DAsync hipMemcpy2DAsync
109#define gpuFreeHost hipHostFree
110#define gpuFree hipFree
111#define gpuMemPrefetchAsync hipMemPrefetchAsync // not sure about this
112#define gpuStreamSynchronize hipStreamSynchronize
113#define gpuDeviceSynchronize hipDeviceSynchronize
114#define gpuMemcpy hipMemcpy
115#define gpuMemAttachGlobal hipMemAttachGlobal
116#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
117#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
118#define gpuStreamCreateWithFlags hipStreamCreateWithFlags
119#define gpuStreamDefault hipStreamDefault
120#define gpublasStatus_t hipblasStatus_t
121#define gpuEventCreate hipEventCreate
122#define gpuEventRecord hipEventRecord
123#define gpuMemGetInfo hipMemGetInfo
124#define gpuOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
125#define gpuEventElapsedTime hipEventElapsedTime
126#define gpuDeviceReset hipDeviceReset
127#define gpuMallocHost hipHostMalloc
128#define gpuEvent_t hipEvent_t
129#define gpuMemset hipMemset
130#define GPUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
131#define GPUBLAS_STATUS_NOT_INITIALIZED HIPBLAS_STATUS_NOT_INITIALIZED
132#define GPUBLAS_STATUS_ALLOC_FAILED HIPBLAS_STATUS_ALLOC_FAILED
133#define GPUBLAS_STATUS_INVALID_VALUE HIPBLAS_STATUS_INVALID_VALUE
134#define GPUBLAS_STATUS_ARCH_MISMATCH HIPBLAS_STATUS_ARCH_MISMATCH
135#define GPUBLAS_STATUS_MAPPING_ERROR HIPBLAS_STATUS_MAPPING_ERROR
136#define GPUBLAS_STATUS_EXECUTION_FAILED HIPBLAS_STATUS_EXECUTION_FAILED
137#define GPUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR
138#define GPUBLAS_STATUS_LICENSE_ERROR HIPBLAS_STATUS_LICENSE_ERROR
139#define GPUBLAS_STATUS_NOT_SUPPORTED HIPBLAS_STATUS_NOT_SUPPORTED
140#define gpublasCreate hipblasCreate
141#define gpublasDestroy hipblasDestroy
142#define gpublasHandle_t hipblasHandle_t
143#define gpublasSetStream hipblasSetStream
144#define gpublasDgemm hipblasDgemm
145#define gpublasSgemm hipblasSgemm
146#define gpublasZgemm hipblasZgemm
147#define gpublasCgemm hipblasCgemm
148#define GPUBLAS_OP_N HIPBLAS_OP_N
149#define gpuDoubleComplex hipblasDoubleComplex
150#define gpuRuntimeGetVersion hipRuntimeGetVersion
151#define threadIdx_x hipThreadIdx_x
152#define threadIdx_y hipThreadIdx_y
153#define blockIdx_x hipBlockIdx_x
154#define blockIdx_y hipBlockIdx_y
155#define blockDim_x hipBlockDim_x
156#define blockDim_y hipBlockDim_y
157#define gridDim_x hipGridDim_x
158#define gridDim_y hipGridDim_y
159
160
161#endif
162
163
164 #define gpublasCheckErrors(fn) \
165 do { \
166 gpublasStatus_t __err = fn; \
167 if (__err != GPUBLAS_STATUS_SUCCESS) { \
168 fprintf(stderr, "Fatal gpublas error: %d (at %s:%d)\n", \
169 (int)(__err), \
170 __FILE__, __LINE__); \
171 fprintf(stderr, "*** FAILED - ABORTING\n"); \
172 exit(1); \
173 } \
174 } while(0);