(git:871dbd5)
Loading...
Searching...
No Matches
offload_runtime.h
Go to the documentation of this file.
1/*----------------------------------------------------------------------------*/
2/* CP2K: A general program to perform molecular dynamics simulations */
3/* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4/* */
5/* SPDX-License-Identifier: BSD-3-Clause */
6/*----------------------------------------------------------------------------*/
7#ifndef OFFLOAD_RUNTIME_H
8#define OFFLOAD_RUNTIME_H
9
10#if !defined(__LIBXSTREAM)
11#undef __OFFLOAD_OPENCL
12#elif !defined(__OFFLOAD_OPENCL)
13#define __OFFLOAD_OPENCL
14#endif
15/* TODO: implement support or missing features */
16#if defined(__OFFLOAD_OPENCL)
17#if !defined(__NO_OFFLOAD_GRID)
18#define __NO_OFFLOAD_GRID
19#endif
20#if !defined(__NO_OFFLOAD_PW)
21#define __NO_OFFLOAD_PW
22#endif
23#endif
24
25#if defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP) || \
26 defined(__OFFLOAD_OPENCL)
27#include <stdbool.h>
28#include <stdio.h>
29#include <stdlib.h>
30
31#if !defined(__OFFLOAD)
32#define __OFFLOAD
33#endif
34
35#if defined(__OFFLOAD_CUDA)
36#include <cuda_runtime.h>
37#elif defined(__OFFLOAD_HIP)
38#include <hip/hip_runtime.h>
39#include <hip/hip_version.h>
40#elif defined(__OFFLOAD_OPENCL)
41/* Types, macros, and functions provided by libxstream_cp2k.h. */
42#include <libxstream/libxstream_cp2k.h>
43#endif
44
45#ifdef __cplusplus
46extern "C" {
47#endif
48
49#if defined(__OFFLOAD_CUDA)
50typedef cudaStream_t offloadStream_t;
51typedef cudaEvent_t offloadEvent_t;
52typedef cudaError_t offloadError_t;
53#define offloadSuccess cudaSuccess
54#elif defined(__OFFLOAD_HIP)
55typedef hipStream_t offloadStream_t;
56typedef hipEvent_t offloadEvent_t;
57typedef hipError_t offloadError_t;
58#define offloadSuccess hipSuccess
59#endif
60
61/*******************************************************************************
62 * \brief Check given Cuda status and upon failure abort with a nice message.
63 * \author Ole Schuett
64 ******************************************************************************/
65#if !defined(OFFLOAD_CHECK)
66#define OFFLOAD_CHECK(CMD) \
67 do { \
68 const offloadError_t error = (CMD); \
69 if (error != offloadSuccess) { \
70 const char *const name = offloadGetErrorName(error); \
71 if (NULL != name && '\0' != *name) { \
72 fprintf(stderr, "ERROR: \"%s\" at %s:%i\n", name, __FILE__, __LINE__); \
73 } else { \
74 fprintf(stderr, "ERROR %i: %s:%i\n", (int)error, __FILE__, __LINE__); \
75 } \
76 abort(); \
77 } \
78 } while (0)
79#endif
80
81#if !defined(__OFFLOAD_OPENCL)
82/*******************************************************************************
83 * \brief Wrapper around cudaGetErrorName.
84 ******************************************************************************/
85static inline const char *offloadGetErrorName(offloadError_t error) {
86#if defined(__OFFLOAD_CUDA)
87 return cudaGetErrorName(error);
88#elif defined(__OFFLOAD_HIP)
89 return hipGetErrorName(error);
90#endif
91}
92
93/*******************************************************************************
94 * \brief Wrapper around cudaGetLastError.
95 ******************************************************************************/
96static inline offloadError_t offloadGetLastError(void) {
97#if defined(__OFFLOAD_CUDA)
98 return cudaGetLastError();
99#elif defined(__OFFLOAD_HIP)
100 return hipGetLastError();
101#endif
102}
103
104/*******************************************************************************
105 * \brief Wrapper around cudaMemsetAsync.
106 ******************************************************************************/
107static inline void offloadMemsetAsync(void *ptr, const int val,
108 const size_t size,
109 offloadStream_t stream) {
110#if defined(__OFFLOAD_CUDA)
111 OFFLOAD_CHECK(cudaMemsetAsync(ptr, val, size, stream));
112#elif defined(__OFFLOAD_HIP)
113 OFFLOAD_CHECK(hipMemsetAsync(ptr, val, size, stream));
114#endif
115}
116
117/*******************************************************************************
118 * \brief Wrapper around cudaMemset.
119 ******************************************************************************/
120static inline void offloadMemset(void *ptr, const int val, size_t size) {
121#if defined(__OFFLOAD_CUDA)
122 OFFLOAD_CHECK(cudaMemset(ptr, val, size));
123#elif defined(__OFFLOAD_HIP)
124 OFFLOAD_CHECK(hipMemset(ptr, val, size));
125#endif
126}
127
128/*******************************************************************************
129 * \brief Wrapper around cudaMemcpyAsync(...,cudaMemcpyHostToDevice,...).
130 ******************************************************************************/
131static inline void offloadMemcpyAsyncHtoD(void *ptr_dev, const void *ptr_hst,
132 const size_t size,
133 offloadStream_t stream) {
134#if defined(__OFFLOAD_UNIFIED_MEMORY)
135 if (ptr_dev == ptr_hst) { /* fast-path only sensible without offsets */
136 return;
137 }
138#endif
139#if defined(__OFFLOAD_CUDA)
140 OFFLOAD_CHECK(
141 cudaMemcpyAsync(ptr_dev, ptr_hst, size, cudaMemcpyHostToDevice, stream));
142#elif defined(__OFFLOAD_HIP)
143 OFFLOAD_CHECK(
144 hipMemcpyAsync(ptr_dev, ptr_hst, size, hipMemcpyHostToDevice, stream));
145#endif
146}
147
148/*******************************************************************************
149 * \brief Wrapper around cudaMemcpyAsync(...,cudaMemcpyDeviceToHost,...).
150 ******************************************************************************/
151static inline void offloadMemcpyAsyncDtoH(void *ptr_hst, const void *ptr_dev,
152 const size_t size,
153 const offloadStream_t stream) {
154#if defined(__OFFLOAD_UNIFIED_MEMORY)
155 if (ptr_hst == ptr_dev) { /* fast-path only sensible without offsets */
156 return;
157 }
158#endif
159#if defined(__OFFLOAD_CUDA)
160 OFFLOAD_CHECK(
161 cudaMemcpyAsync(ptr_hst, ptr_dev, size, cudaMemcpyDeviceToHost, stream));
162#elif defined(__OFFLOAD_HIP)
163 OFFLOAD_CHECK(
164 hipMemcpyAsync(ptr_hst, ptr_dev, size, hipMemcpyDeviceToHost, stream));
165#endif
166}
167
168/*******************************************************************************
169 * \brief Wrapper around cudaMemcpyAsync(...,cudaMemcpyDeviceToDevice).
170 ******************************************************************************/
171static inline void offloadMemcpyAsyncDtoD(void *dst, const void *src,
172 const size_t size,
173 const offloadStream_t stream) {
174#if defined(__OFFLOAD_CUDA)
175 OFFLOAD_CHECK(
176 cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice, stream));
177#elif defined(__OFFLOAD_HIP)
178 OFFLOAD_CHECK(
179 hipMemcpyAsync(dst, src, size, hipMemcpyDeviceToDevice, stream));
180#endif
181}
182
183/*******************************************************************************
184 * \brief Wrapper around cudaMemcpy(...,cudaMemcpyHostToDevice).
185 ******************************************************************************/
186static inline void offloadMemcpyHtoD(void *ptr_dev, const void *ptr_hst,
187 const size_t size) {
188#if defined(__OFFLOAD_UNIFIED_MEMORY)
189 if (ptr_dev == ptr_hst) { /* fast-path only sensible without offsets */
190 return;
191 }
192#endif
193#if defined(__OFFLOAD_CUDA)
194 OFFLOAD_CHECK(cudaMemcpy(ptr_dev, ptr_hst, size, cudaMemcpyHostToDevice));
195#elif defined(__OFFLOAD_HIP)
196 OFFLOAD_CHECK(hipMemcpy(ptr_dev, ptr_hst, size, hipMemcpyHostToDevice));
197#endif
198}
199
200/*******************************************************************************
201 * \brief Wrapper around cudaMemcpy(...,cudaMemcpyDeviceToHost).
202 ******************************************************************************/
203static inline void offloadMemcpyDtoH(void *ptr_dev, const void *ptr_hst,
204 const size_t size) {
205#if defined(__OFFLOAD_UNIFIED_MEMORY)
206 if (ptr_dev == ptr_hst) { /* fast-path only sensible without offsets */
207 return;
208 }
209#endif
210#if defined(__OFFLOAD_CUDA)
211 OFFLOAD_CHECK(cudaMemcpy(ptr_dev, ptr_hst, size, cudaMemcpyDeviceToHost));
212#elif defined(__OFFLOAD_HIP)
213 OFFLOAD_CHECK(hipMemcpy(ptr_dev, ptr_hst, size, hipMemcpyDeviceToHost));
214#endif
215}
216
217/*******************************************************************************
218 * \brief Wrapper around cudaMemcpyToSymbol.
219 ******************************************************************************/
220static inline void offloadMemcpyToSymbol(const void *symbol, const void *src,
221 const size_t count) {
222#if defined(__OFFLOAD_CUDA)
223 OFFLOAD_CHECK(
224 cudaMemcpyToSymbol(symbol, src, count, 0, cudaMemcpyHostToDevice));
225#elif defined(__OFFLOAD_HIP)
226 OFFLOAD_CHECK(
227 hipMemcpyToSymbol(symbol, src, count, 0, hipMemcpyHostToDevice));
228#endif
229}
230
231/*******************************************************************************
232 * \brief Wrapper around cudaEventCreate.
233 ******************************************************************************/
234static inline void offloadEventCreate(offloadEvent_t *event) {
235#if defined(__OFFLOAD_CUDA)
236 OFFLOAD_CHECK(cudaEventCreate(event));
237#elif defined(__OFFLOAD_HIP)
238 OFFLOAD_CHECK(hipEventCreate(event));
239#endif
240}
241
242/*******************************************************************************
243 * \brief Wrapper around cudaEventDestroy.
244 ******************************************************************************/
245static inline void offloadEventDestroy(offloadEvent_t event) {
246#if defined(__OFFLOAD_CUDA)
247 OFFLOAD_CHECK(cudaEventDestroy(event));
248#elif defined(__OFFLOAD_HIP)
249 OFFLOAD_CHECK(hipEventDestroy(event));
250#endif
251}
252
253/*******************************************************************************
254 * \brief Wrapper around cudaStreamCreate.
255 ******************************************************************************/
256static inline void offloadStreamCreate(offloadStream_t *stream) {
257#if defined(__OFFLOAD_CUDA)
258 OFFLOAD_CHECK(cudaStreamCreate(stream));
259#elif defined(__OFFLOAD_HIP)
260 OFFLOAD_CHECK(hipStreamCreate(stream));
261#endif
262}
263
264/*******************************************************************************
265 * \brief Wrapper around cudaStreamDestroy.
266 ******************************************************************************/
267static inline void offloadStreamDestroy(offloadStream_t stream) {
268#if defined(__OFFLOAD_CUDA)
269 OFFLOAD_CHECK(cudaStreamDestroy(stream));
270#elif defined(__OFFLOAD_HIP)
271 OFFLOAD_CHECK(hipStreamDestroy(stream));
272#endif
273}
274
275/*******************************************************************************
276 * \brief Wrapper around cudaEventSynchronize.
277 ******************************************************************************/
278static inline void offloadEventSynchronize(offloadEvent_t event) {
279#if defined(__OFFLOAD_CUDA)
280 OFFLOAD_CHECK(cudaEventSynchronize(event));
281#elif defined(__OFFLOAD_HIP)
282 OFFLOAD_CHECK(hipEventSynchronize(event));
283#endif
284}
285
286/*******************************************************************************
287 * \brief Wrapper around cudaStreamSynchronize.
288 ******************************************************************************/
289static inline void offloadStreamSynchronize(offloadStream_t stream) {
290#if defined(__OFFLOAD_CUDA)
291 OFFLOAD_CHECK(cudaStreamSynchronize(stream));
292#elif defined(__OFFLOAD_HIP)
293 OFFLOAD_CHECK(hipStreamSynchronize(stream));
294#endif
295}
296
297/*******************************************************************************
298 * \brief Wrapper around cudaEventRecord.
299 ******************************************************************************/
300static inline void offloadEventRecord(offloadEvent_t event,
301 offloadStream_t stream) {
302#if defined(__OFFLOAD_CUDA)
303 OFFLOAD_CHECK(cudaEventRecord(event, stream));
304#elif defined(__OFFLOAD_HIP)
305 OFFLOAD_CHECK(hipEventRecord(event, stream));
306#endif
307}
308
309/*******************************************************************************
310 * \brief Wrapper around cudaMallocHost.
311 ******************************************************************************/
312static inline void offloadMallocHost(void **ptr, size_t size) {
313#if defined(__OFFLOAD_CUDA)
314 OFFLOAD_CHECK(cudaMallocHost(ptr, size));
315#elif defined(__OFFLOAD_HIP)
316 OFFLOAD_CHECK(hipHostMalloc(ptr, size, hipHostMallocDefault)); // inconsistent
317#else
318 assert(NULL != ptr);
319 *ptr = malloc(size);
320#endif
321}
322
323/*******************************************************************************
324 * \brief Wrapper around cudaMalloc.
325 ******************************************************************************/
326static inline void offloadMalloc(void **ptr, size_t size) {
327#if defined(__OFFLOAD_CUDA)
328 OFFLOAD_CHECK(cudaMalloc(ptr, size));
329#elif defined(__OFFLOAD_HIP)
330 OFFLOAD_CHECK(hipMalloc(ptr, size));
331#else
332 assert(NULL != ptr);
333 *ptr = NULL;
334#endif
335}
336
337/*******************************************************************************
338 * \brief Wrapper around cudaFree.
339 ******************************************************************************/
340static inline void offloadFree(void *ptr) {
341#if defined(__OFFLOAD_CUDA)
342 OFFLOAD_CHECK(cudaFree(ptr));
343#elif defined(__OFFLOAD_HIP)
344 OFFLOAD_CHECK(hipFree(ptr));
345#else
346 assert(NULL == ptr);
347#endif
348}
349
350/*******************************************************************************
351 * \brief Wrapper around cudaFreeHost.
352 ******************************************************************************/
353static inline void offloadFreeHost(void *ptr) {
354#if defined(__OFFLOAD_CUDA)
355 OFFLOAD_CHECK(cudaFreeHost(ptr));
356#elif defined(__OFFLOAD_HIP)
357 OFFLOAD_CHECK(hipHostFree(ptr)); // inconsistent
358#else
359 free(ptr);
360#endif
361}
362
363/*******************************************************************************
364 * \brief Wrapper around cudaStreamWaitEvent.
365 ******************************************************************************/
366static inline void offloadStreamWaitEvent(offloadStream_t stream,
367 offloadEvent_t event) {
368#if defined(__OFFLOAD_CUDA)
369 OFFLOAD_CHECK(cudaStreamWaitEvent(stream, event, 0 /*flags*/));
370#elif defined(__OFFLOAD_HIP)
371 OFFLOAD_CHECK(hipStreamWaitEvent(stream, event, 0 /*flags*/));
372#endif
373}
374
375/*******************************************************************************
376 * \brief Wrapper around cudaEventQuery.
377 ******************************************************************************/
378static inline bool offloadEventQuery(offloadEvent_t event) {
379#if defined(__OFFLOAD_CUDA)
380 return offloadSuccess == cudaEventQuery(event);
381#elif defined(__OFFLOAD_HIP)
382 return offloadSuccess == hipEventQuery(event);
383#endif
384}
385
386/*******************************************************************************
387 * \brief Wrapper around cudaDeviceSynchronize.
388 ******************************************************************************/
389static inline void offloadDeviceSynchronize(void) {
390#if defined(__OFFLOAD_CUDA)
391 OFFLOAD_CHECK(cudaDeviceSynchronize());
392#elif defined(__OFFLOAD_HIP)
393 OFFLOAD_CHECK(hipDeviceSynchronize());
394#endif
395}
396
397/*******************************************************************************
398 * \brief Wrapper around cudaDeviceSetLimit(cudaLimitMallocHeapSize,...).
399 ******************************************************************************/
400static inline void offloadEnsureMallocHeapSize(const size_t required_size) {
401#if defined(__OFFLOAD_CUDA)
402 size_t current_size;
403 OFFLOAD_CHECK(cudaDeviceGetLimit(&current_size, cudaLimitMallocHeapSize));
404 if (current_size < required_size) {
405 OFFLOAD_CHECK(cudaDeviceSetLimit(cudaLimitMallocHeapSize, required_size));
406 }
407#elif defined(__OFFLOAD_HIP) && (HIP_VERSION >= 50300000)
408 size_t current_size;
409 OFFLOAD_CHECK(hipDeviceGetLimit(&current_size, hipLimitMallocHeapSize));
410 if (current_size < required_size) {
411 OFFLOAD_CHECK(hipDeviceSetLimit(hipLimitMallocHeapSize, required_size));
412 }
413#else
414 (void)required_size; /* mark used */
415#endif
416}
417
418#endif /* !defined(__OFFLOAD_OPENCL) */
419
420#ifdef __cplusplus
421}
422#endif
423
424#endif // defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP) ||
425 // defined(__OFFLOAD_OPENCL)
426#endif