7#ifndef OFFLOAD_RUNTIME_H
8#define OFFLOAD_RUNTIME_H
10#if !defined(__LIBXSTREAM)
11#undef __OFFLOAD_OPENCL
12#elif !defined(__OFFLOAD_OPENCL)
13#define __OFFLOAD_OPENCL
16#if defined(__OFFLOAD_OPENCL)
17#if !defined(__NO_OFFLOAD_GRID)
18#define __NO_OFFLOAD_GRID
20#if !defined(__NO_OFFLOAD_PW)
21#define __NO_OFFLOAD_PW
25#if defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP) || \
26 defined(__OFFLOAD_OPENCL)
31#if !defined(__OFFLOAD)
35#if defined(__OFFLOAD_CUDA)
36#include <cuda_runtime.h>
37#elif defined(__OFFLOAD_HIP)
38#include <hip/hip_runtime.h>
39#include <hip/hip_version.h>
40#elif defined(__OFFLOAD_OPENCL)
42#include <libxstream/libxstream_cp2k.h>
49#if defined(__OFFLOAD_CUDA)
50typedef cudaStream_t offloadStream_t;
51typedef cudaEvent_t offloadEvent_t;
52typedef cudaError_t offloadError_t;
53#define offloadSuccess cudaSuccess
54#elif defined(__OFFLOAD_HIP)
55typedef hipStream_t offloadStream_t;
56typedef hipEvent_t offloadEvent_t;
57typedef hipError_t offloadError_t;
58#define offloadSuccess hipSuccess
65#if !defined(OFFLOAD_CHECK)
66#define OFFLOAD_CHECK(CMD) \
68 const offloadError_t error = (CMD); \
69 if (error != offloadSuccess) { \
70 const char *const name = offloadGetErrorName(error); \
71 if (NULL != name && '\0' != *name) { \
72 fprintf(stderr, "ERROR: \"%s\" at %s:%i\n", name, __FILE__, __LINE__); \
74 fprintf(stderr, "ERROR %i: %s:%i\n", (int)error, __FILE__, __LINE__); \
81#if !defined(__OFFLOAD_OPENCL)
85static inline const char *offloadGetErrorName(offloadError_t error) {
86#if defined(__OFFLOAD_CUDA)
87 return cudaGetErrorName(error);
88#elif defined(__OFFLOAD_HIP)
89 return hipGetErrorName(error);
96static inline offloadError_t offloadGetLastError(
void) {
97#if defined(__OFFLOAD_CUDA)
98 return cudaGetLastError();
99#elif defined(__OFFLOAD_HIP)
100 return hipGetLastError();
107static inline void offloadMemsetAsync(
void *ptr,
const int val,
109 offloadStream_t stream) {
110#if defined(__OFFLOAD_CUDA)
111 OFFLOAD_CHECK(cudaMemsetAsync(ptr, val, size, stream));
112#elif defined(__OFFLOAD_HIP)
113 OFFLOAD_CHECK(hipMemsetAsync(ptr, val, size, stream));
120static inline void offloadMemset(
void *ptr,
const int val,
size_t size) {
121#if defined(__OFFLOAD_CUDA)
122 OFFLOAD_CHECK(cudaMemset(ptr, val, size));
123#elif defined(__OFFLOAD_HIP)
124 OFFLOAD_CHECK(hipMemset(ptr, val, size));
131static inline void offloadMemcpyAsyncHtoD(
void *ptr_dev,
const void *ptr_hst,
133 offloadStream_t stream) {
134#if defined(__OFFLOAD_UNIFIED_MEMORY)
135 if (ptr_dev == ptr_hst) {
139#if defined(__OFFLOAD_CUDA)
141 cudaMemcpyAsync(ptr_dev, ptr_hst, size, cudaMemcpyHostToDevice, stream));
142#elif defined(__OFFLOAD_HIP)
144 hipMemcpyAsync(ptr_dev, ptr_hst, size, hipMemcpyHostToDevice, stream));
151static inline void offloadMemcpyAsyncDtoH(
void *ptr_hst,
const void *ptr_dev,
153 const offloadStream_t stream) {
154#if defined(__OFFLOAD_UNIFIED_MEMORY)
155 if (ptr_hst == ptr_dev) {
159#if defined(__OFFLOAD_CUDA)
161 cudaMemcpyAsync(ptr_hst, ptr_dev, size, cudaMemcpyDeviceToHost, stream));
162#elif defined(__OFFLOAD_HIP)
164 hipMemcpyAsync(ptr_hst, ptr_dev, size, hipMemcpyDeviceToHost, stream));
171static inline void offloadMemcpyAsyncDtoD(
void *dst,
const void *src,
173 const offloadStream_t stream) {
174#if defined(__OFFLOAD_CUDA)
176 cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice, stream));
177#elif defined(__OFFLOAD_HIP)
179 hipMemcpyAsync(dst, src, size, hipMemcpyDeviceToDevice, stream));
186static inline void offloadMemcpyHtoD(
void *ptr_dev,
const void *ptr_hst,
188#if defined(__OFFLOAD_UNIFIED_MEMORY)
189 if (ptr_dev == ptr_hst) {
193#if defined(__OFFLOAD_CUDA)
194 OFFLOAD_CHECK(cudaMemcpy(ptr_dev, ptr_hst, size, cudaMemcpyHostToDevice));
195#elif defined(__OFFLOAD_HIP)
196 OFFLOAD_CHECK(hipMemcpy(ptr_dev, ptr_hst, size, hipMemcpyHostToDevice));
203static inline void offloadMemcpyDtoH(
void *ptr_dev,
const void *ptr_hst,
205#if defined(__OFFLOAD_UNIFIED_MEMORY)
206 if (ptr_dev == ptr_hst) {
210#if defined(__OFFLOAD_CUDA)
211 OFFLOAD_CHECK(cudaMemcpy(ptr_dev, ptr_hst, size, cudaMemcpyDeviceToHost));
212#elif defined(__OFFLOAD_HIP)
213 OFFLOAD_CHECK(hipMemcpy(ptr_dev, ptr_hst, size, hipMemcpyDeviceToHost));
220static inline void offloadMemcpyToSymbol(
const void *symbol,
const void *src,
221 const size_t count) {
222#if defined(__OFFLOAD_CUDA)
224 cudaMemcpyToSymbol(symbol, src, count, 0, cudaMemcpyHostToDevice));
225#elif defined(__OFFLOAD_HIP)
227 hipMemcpyToSymbol(symbol, src, count, 0, hipMemcpyHostToDevice));
234static inline void offloadEventCreate(offloadEvent_t *event) {
235#if defined(__OFFLOAD_CUDA)
236 OFFLOAD_CHECK(cudaEventCreate(event));
237#elif defined(__OFFLOAD_HIP)
238 OFFLOAD_CHECK(hipEventCreate(event));
245static inline void offloadEventDestroy(offloadEvent_t event) {
246#if defined(__OFFLOAD_CUDA)
247 OFFLOAD_CHECK(cudaEventDestroy(event));
248#elif defined(__OFFLOAD_HIP)
249 OFFLOAD_CHECK(hipEventDestroy(event));
256static inline void offloadStreamCreate(offloadStream_t *stream) {
257#if defined(__OFFLOAD_CUDA)
258 OFFLOAD_CHECK(cudaStreamCreate(stream));
259#elif defined(__OFFLOAD_HIP)
260 OFFLOAD_CHECK(hipStreamCreate(stream));
267static inline void offloadStreamDestroy(offloadStream_t stream) {
268#if defined(__OFFLOAD_CUDA)
269 OFFLOAD_CHECK(cudaStreamDestroy(stream));
270#elif defined(__OFFLOAD_HIP)
271 OFFLOAD_CHECK(hipStreamDestroy(stream));
278static inline void offloadEventSynchronize(offloadEvent_t event) {
279#if defined(__OFFLOAD_CUDA)
280 OFFLOAD_CHECK(cudaEventSynchronize(event));
281#elif defined(__OFFLOAD_HIP)
282 OFFLOAD_CHECK(hipEventSynchronize(event));
289static inline void offloadStreamSynchronize(offloadStream_t stream) {
290#if defined(__OFFLOAD_CUDA)
291 OFFLOAD_CHECK(cudaStreamSynchronize(stream));
292#elif defined(__OFFLOAD_HIP)
293 OFFLOAD_CHECK(hipStreamSynchronize(stream));
300static inline void offloadEventRecord(offloadEvent_t event,
301 offloadStream_t stream) {
302#if defined(__OFFLOAD_CUDA)
303 OFFLOAD_CHECK(cudaEventRecord(event, stream));
304#elif defined(__OFFLOAD_HIP)
305 OFFLOAD_CHECK(hipEventRecord(event, stream));
312static inline void offloadMallocHost(
void **ptr,
size_t size) {
313#if defined(__OFFLOAD_CUDA)
314 OFFLOAD_CHECK(cudaMallocHost(ptr, size));
315#elif defined(__OFFLOAD_HIP)
316 OFFLOAD_CHECK(hipHostMalloc(ptr, size, hipHostMallocDefault));
326static inline void offloadMalloc(
void **ptr,
size_t size) {
327#if defined(__OFFLOAD_CUDA)
328 OFFLOAD_CHECK(cudaMalloc(ptr, size));
329#elif defined(__OFFLOAD_HIP)
330 OFFLOAD_CHECK(hipMalloc(ptr, size));
340static inline void offloadFree(
void *ptr) {
341#if defined(__OFFLOAD_CUDA)
342 OFFLOAD_CHECK(cudaFree(ptr));
343#elif defined(__OFFLOAD_HIP)
344 OFFLOAD_CHECK(hipFree(ptr));
353static inline void offloadFreeHost(
void *ptr) {
354#if defined(__OFFLOAD_CUDA)
355 OFFLOAD_CHECK(cudaFreeHost(ptr));
356#elif defined(__OFFLOAD_HIP)
357 OFFLOAD_CHECK(hipHostFree(ptr));
366static inline void offloadStreamWaitEvent(offloadStream_t stream,
367 offloadEvent_t event) {
368#if defined(__OFFLOAD_CUDA)
369 OFFLOAD_CHECK(cudaStreamWaitEvent(stream, event, 0 ));
370#elif defined(__OFFLOAD_HIP)
371 OFFLOAD_CHECK(hipStreamWaitEvent(stream, event, 0 ));
378static inline bool offloadEventQuery(offloadEvent_t event) {
379#if defined(__OFFLOAD_CUDA)
380 return offloadSuccess == cudaEventQuery(event);
381#elif defined(__OFFLOAD_HIP)
382 return offloadSuccess == hipEventQuery(event);
389static inline void offloadDeviceSynchronize(
void) {
390#if defined(__OFFLOAD_CUDA)
391 OFFLOAD_CHECK(cudaDeviceSynchronize());
392#elif defined(__OFFLOAD_HIP)
393 OFFLOAD_CHECK(hipDeviceSynchronize());
400static inline void offloadEnsureMallocHeapSize(
const size_t required_size) {
401#if defined(__OFFLOAD_CUDA)
403 OFFLOAD_CHECK(cudaDeviceGetLimit(¤t_size, cudaLimitMallocHeapSize));
404 if (current_size < required_size) {
405 OFFLOAD_CHECK(cudaDeviceSetLimit(cudaLimitMallocHeapSize, required_size));
407#elif defined(__OFFLOAD_HIP) && (HIP_VERSION >= 50300000)
409 OFFLOAD_CHECK(hipDeviceGetLimit(¤t_size, hipLimitMallocHeapSize));
410 if (current_size < required_size) {
411 OFFLOAD_CHECK(hipDeviceSetLimit(hipLimitMallocHeapSize, required_size));