SuperLU Distributed 8.2.1
Distributed memory sparse direct solver
gpu_wrapper.h
Go to the documentation of this file.
1
21#ifndef __SUPERLU_GPUWRAPPER /* allow multiple inclusions */
22#define __SUPERLU_GPUWRAPPER
23
24#ifdef HAVE_CUDA
25#include <cublas_v2.h>
26#include "cuda.h"
27#include "cuda_runtime_api.h"
28#include "cuda_runtime.h"
29#include <cusparse.h>
30#include <cuda_profiler_api.h>
31
32#define gpuDeviceProp cudaDeviceProp
33#define gpuGetDeviceCount cudaGetDeviceCount
34#define gpuGetDeviceProperties cudaGetDeviceProperties
35#define gpuSetDevice cudaSetDevice
36#define gpuGetDevice cudaGetDevice
37#define gpuError_t cudaError_t
38#define gpuSuccess cudaSuccess
39#define gpuGetErrorString cudaGetErrorString
40#define gpuMalloc cudaMalloc
41#define gpuHostMalloc cudaHostAlloc
42#define gpuHostMallocDefault cudaHostAllocDefault
43#define gpuMallocManaged cudaMallocManaged
44#define gpuStream_t cudaStream_t
45#define gpuStreamCreate cudaStreamCreate
46#define gpuStreamDestroy cudaStreamDestroy
47#define gpuMemcpyAsync cudaMemcpyAsync
48#define gpuMemcpy2DAsync cudaMemcpy2DAsync
49#define gpuFreeHost cudaFreeHost
50#define gpuFree cudaFree
51#define gpuMemPrefetchAsync cudaMemPrefetchAsync
52#define gpuStreamSynchronize cudaStreamSynchronize
53#define gpuDeviceSynchronize cudaDeviceSynchronize
54#define gpuMemcpy cudaMemcpy
55#define gpuMemAttachGlobal cudaMemAttachGlobal
56#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
57#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
58#define gpuStreamCreateWithFlags cudaStreamCreateWithFlags
59#define gpuStreamDestroyWithFlags cudaStreamDestroyWithFlags
60#define gpuStreamDefault cudaStreamDefault
61#define gpublasStatus_t cublasStatus_t
62#define gpuEventCreate cudaEventCreate
63#define gpuEventRecord cudaEventRecord
64#define gpuMemGetInfo cudaMemGetInfo
65#define gpuOccupancyMaxPotentialBlockSize cudaOccupancyMaxPotentialBlockSize
66#define gpuEventElapsedTime cudaEventElapsedTime
67#define gpuDeviceReset cudaDeviceReset
68#define gpuMallocHost cudaMallocHost
69#define gpuEvent_t cudaEvent_t
70#define gpuMemset cudaMemset
71#define GPUBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
72#define GPUBLAS_STATUS_NOT_INITIALIZED CUBLAS_STATUS_NOT_INITIALIZED
73#define GPUBLAS_STATUS_ALLOC_FAILED CUBLAS_STATUS_ALLOC_FAILED
74#define GPUBLAS_STATUS_INVALID_VALUE CUBLAS_STATUS_INVALID_VALUE
75#define GPUBLAS_STATUS_ARCH_MISMATCH CUBLAS_STATUS_ARCH_MISMATCH
76#define GPUBLAS_STATUS_MAPPING_ERROR CUBLAS_STATUS_MAPPING_ERROR
77#define GPUBLAS_STATUS_EXECUTION_FAILED CUBLAS_STATUS_EXECUTION_FAILED
78#define GPUBLAS_STATUS_INTERNAL_ERROR CUBLAS_STATUS_INTERNAL_ERROR
79#define GPUBLAS_STATUS_LICENSE_ERROR CUBLAS_STATUS_LICENSE_ERROR
80#define GPUBLAS_STATUS_NOT_SUPPORTED CUBLAS_STATUS_NOT_SUPPORTED
81#define gpublasCreate cublasCreate
82#define gpublasDestroy cublasDestroy
83#define gpublasHandle_t cublasHandle_t
84#define gpublasSetStream cublasSetStream
85#define gpublasDgemm cublasDgemm
86#define gpublasSgemm cublasSgemm
87#define gpublasZgemm cublasZgemm
88#define gpublasCgemm cublasCgemm
89#define GPUBLAS_OP_N CUBLAS_OP_N
90#define gpuDoubleComplex cuDoubleComplex
91#define gpuRuntimeGetVersion cudaRuntimeGetVersion
92#define threadIdx_x threadIdx.x
93#define threadIdx_y threadIdx.y
94#define blockIdx_x blockIdx.x
95#define blockIdx_y blockIdx.y
96#define blockDim_x blockDim.x
97#define blockDim_y blockDim.y
98#define gridDim_x gridDim.x
99#define gridDim_y gridDim.y
100
101
102
103
104#elif defined(HAVE_HIP)
105
106#ifndef __HIP_PLATFORM_AMD__
107#define __HIP_PLATFORM_AMD__
108#endif
109
110#include "hip/hip_runtime_api.h"
111#include "hip/hip_runtime.h"
112#include <hipblas/hipblas.h>
113
114// #include "roctracer_ext.h" // need to pass the include dir directly to HIP_HIPCC_FLAGS
115// // roctx header file
116// #include <roctx.h>
117
118#define gpuDeviceProp hipDeviceProp_t
119#define gpuGetDeviceCount hipGetDeviceCount
120#define gpuGetDeviceProperties hipGetDeviceProperties
121#define gpuSetDevice hipSetDevice
122#define gpuGetDevice hipGetDevice
123#define gpuError_t hipError_t
124#define gpuSuccess hipSuccess
125#define gpuGetErrorString hipGetErrorString
126#define gpuMalloc hipMalloc
127#define gpuHostMalloc hipHostMalloc
128#define gpuHostMallocDefault hipHostMallocDefault
129#define gpuMallocManaged hipMallocManaged
130#define gpuStream_t hipStream_t
131#define gpuStreamCreate hipStreamCreate
132#define gpuStreamDestroy hipStreamDestroy
133#define gpuMemcpyAsync hipMemcpyAsync
134#define gpuMemcpy2DAsync hipMemcpy2DAsync
135#define gpuFreeHost hipHostFree
136#define gpuFree hipFree
137#define gpuMemPrefetchAsync hipMemPrefetchAsync // not sure about this
138#define gpuStreamSynchronize hipStreamSynchronize
139#define gpuDeviceSynchronize hipDeviceSynchronize
140#define gpuMemcpy hipMemcpy
141#define gpuMemAttachGlobal hipMemAttachGlobal
142#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
143#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
144#define gpuStreamCreateWithFlags hipStreamCreateWithFlags
145#define gpuStreamDestroyWithFlags hipStreamDestroyWithFlags
146#define gpuStreamDefault hipStreamDefault
147#define gpublasStatus_t hipblasStatus_t
148#define gpuEventCreate hipEventCreate
149#define gpuEventRecord hipEventRecord
150#define gpuMemGetInfo hipMemGetInfo
151#define gpuOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
152#define gpuEventElapsedTime hipEventElapsedTime
153#define gpuDeviceReset hipDeviceReset
154#define gpuMallocHost hipHostMalloc
155#define gpuEvent_t hipEvent_t
156#define gpuMemset hipMemset
157#define GPUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
158#define GPUBLAS_STATUS_NOT_INITIALIZED HIPBLAS_STATUS_NOT_INITIALIZED
159#define GPUBLAS_STATUS_ALLOC_FAILED HIPBLAS_STATUS_ALLOC_FAILED
160#define GPUBLAS_STATUS_INVALID_VALUE HIPBLAS_STATUS_INVALID_VALUE
161#define GPUBLAS_STATUS_ARCH_MISMATCH HIPBLAS_STATUS_ARCH_MISMATCH
162#define GPUBLAS_STATUS_MAPPING_ERROR HIPBLAS_STATUS_MAPPING_ERROR
163#define GPUBLAS_STATUS_EXECUTION_FAILED HIPBLAS_STATUS_EXECUTION_FAILED
164#define GPUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR
165#define GPUBLAS_STATUS_LICENSE_ERROR HIPBLAS_STATUS_LICENSE_ERROR
166#define GPUBLAS_STATUS_NOT_SUPPORTED HIPBLAS_STATUS_NOT_SUPPORTED
167#define gpublasCreate hipblasCreate
168#define gpublasDestroy hipblasDestroy
169#define gpublasHandle_t hipblasHandle_t
170#define gpublasSetStream hipblasSetStream
171#define gpublasDgemm hipblasDgemm
172#define gpublasSgemm hipblasSgemm
173#define gpublasZgemm hipblasZgemm
174#define gpublasCgemm hipblasCgemm
175#define GPUBLAS_OP_N HIPBLAS_OP_N
176#define gpuDoubleComplex hipblasDoubleComplex
177#define gpuRuntimeGetVersion hipRuntimeGetVersion
178#define threadIdx_x hipThreadIdx_x
179#define threadIdx_y hipThreadIdx_y
180#define blockIdx_x hipBlockIdx_x
181#define blockIdx_y hipBlockIdx_y
182#define blockDim_x hipBlockDim_x
183#define blockDim_y hipBlockDim_y
184#define gridDim_x hipGridDim_x
185#define gridDim_y hipGridDim_y
186
187
188#endif
189
190
191 #define gpublasCheckErrors(fn) \
192 do { \
193 gpublasStatus_t __err = fn; \
194 if (__err != GPUBLAS_STATUS_SUCCESS) { \
195 fprintf(stderr, "Fatal gpublas error: %d (at %s:%d)\n", \
196 (int)(__err), \
197 __FILE__, __LINE__); \
198 fprintf(stderr, "*** FAILED - ABORTING\n"); \
199 exit(1); \
200 } \
201 } while(0);
202
203
204#endif /* __SUPERLU_GPUWRAPPER */